nb 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/Gemfile.ci +11 -0
- data/README.md +41 -3
- data/lib/nb/naive_bayes.rb +8 -0
- data/lib/nb/version.rb +1 -1
- data/spec/nb/naive_bayes_spec.rb +20 -0
- data/spec/spec_helper.rb +6 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e163cd57a340190efef6d152493f3cbf5f0ee6c7
|
4
|
+
data.tar.gz: 3dc32f2eaf69f8cda5fe9672bdd54c4c51d47a57
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb8c29aa398fdda618c7c624146435a5d38386f4698f4a4262c5178826089e50568b5eb213af53f7f1de48d5e73f93e5fad9cbf47868403f4a522350f03c1b73
|
7
|
+
data.tar.gz: 3e3bc9a7f6371ad037fb62a1cf3a5cbeb747aa958bea96c8a14879174a3bfd754a7580612961916f6be926b9cc56a80be28add65572e8ad376ce274caf1a1fa5
|
data/.travis.yml
ADDED
data/Gemfile.ci
ADDED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
#
|
1
|
+
# nb
|
2
2
|
|
3
|
-
|
3
|
+
[](https://codeclimate.com/github/forresty/nb)
|
4
|
+
[](https://travis-ci.org/forresty/nb)
|
5
|
+
|
6
|
+
yet another Naive Bayes library
|
4
7
|
|
5
8
|
## Installation
|
6
9
|
|
@@ -20,7 +23,42 @@ Or install it yourself as:
|
|
20
23
|
|
21
24
|
## Usage
|
22
25
|
|
23
|
-
|
26
|
+
```ruby
|
27
|
+
bayes = NaiveBayes.new :love, :hate
|
28
|
+
|
29
|
+
bayes.train :love, 'I', 'love', 'you'
|
30
|
+
bayes.train :hate, 'I', 'hate', 'you'
|
31
|
+
|
32
|
+
bayes.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
|
33
|
+
bayes.classify(*%w{ I love you }).should == [:love, 0.5]
|
34
|
+
bayes.classify(*%w{ love }).should == [:love, 0.5]
|
35
|
+
```
|
36
|
+
|
37
|
+
### ability to view top tokens
|
38
|
+
|
39
|
+
`bayes.top_tokens_of_category(:spam)`
|
40
|
+
|
41
|
+
```
|
42
|
+
+------------+------+--------------------+
|
43
|
+
| 学生 | 1966 | 0.9995149465854383 |
|
44
|
+
| 多劳多得 | 1953 | 0.999511719439795 |
|
45
|
+
| 党 | 1517 | 0.9993714712416684 |
|
46
|
+
| 结 | 1327 | 0.9992815430836995 |
|
47
|
+
| 工资 | 1213 | 0.9992140742313297 |
|
48
|
+
| 不等 | 1135 | 0.999160108836817 |
|
49
|
+
| 诚聘 | 1107 | 0.9991388832706672 |
|
50
|
+
| 咨询 | 1095 | 0.9991294545902496 |
|
51
|
+
| 加入 | 1071 | 0.9991099639327047 |
|
52
|
+
| 限制 | 1046 | 0.9990887109454397 |
|
53
|
+
| 50 | 1041 | 0.9990843379645474 |
|
54
|
+
| 上网 | 1020 | 0.9990655037161098 |
|
55
|
+
| 流动资金 | 952 | 0.9989988208099915 |
|
56
|
+
| 曰 | 902 | 0.9989433817121107 |
|
57
|
+
| 办公室 | 861 | 0.9988931222482719 |
|
58
|
+
| 职员 | 827 | 0.9988476682254364 |
|
59
|
+
| 绝对 | 823 | 0.9988420740701035 |
|
60
|
+
+------------+------+--------------------+
|
61
|
+
```
|
24
62
|
|
25
63
|
## Contributing
|
26
64
|
|
data/lib/nb/naive_bayes.rb
CHANGED
@@ -33,6 +33,14 @@ class NaiveBayes
|
|
33
33
|
scores.sort_by { |k, v| -v }
|
34
34
|
end
|
35
35
|
|
36
|
+
def top_tokens_of_category(category, count=20)
|
37
|
+
tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
|
38
|
+
end
|
39
|
+
|
40
|
+
def probability_of_a_token_in_category(token, category)
|
41
|
+
probability_of_a_token_given_a_category(token, category) / @categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
|
42
|
+
end
|
43
|
+
|
36
44
|
def probability_of_a_token_given_a_category(token, category)
|
37
45
|
return assumed_probability if @tokens_count[category][token] == 0
|
38
46
|
|
data/lib/nb/version.rb
CHANGED
data/spec/nb/naive_bayes_spec.rb
CHANGED
@@ -8,8 +8,10 @@ describe NaiveBayes do
|
|
8
8
|
it { should respond_to :probability_of_a_token_given_a_category }
|
9
9
|
it { should respond_to :probability_of_tokens_given_a_category }
|
10
10
|
it { should respond_to :probability_of_a_category }
|
11
|
+
it { should respond_to :probability_of_a_token_in_category }
|
11
12
|
# it { should respond_to :total_number_of_tokens }
|
12
13
|
it { should respond_to :total_number_of_items }
|
14
|
+
it { should respond_to :top_tokens_of_category }
|
13
15
|
|
14
16
|
let(:bayes) { NaiveBayes.new(:love, :hate) }
|
15
17
|
subject { bayes }
|
@@ -27,6 +29,24 @@ describe NaiveBayes do
|
|
27
29
|
# end
|
28
30
|
# end
|
29
31
|
|
32
|
+
describe '#probability_of_a_token_in_category' do
|
33
|
+
it 'calculates correctly' do
|
34
|
+
bayes.train :love, 'I', 'love', 'you'
|
35
|
+
bayes.train :hate, 'I', 'hate', 'you'
|
36
|
+
|
37
|
+
bayes.probability_of_a_token_in_category('love', :love).should == 2.0/3 # 1 / ( 1 + 0.5 )
|
38
|
+
bayes.probability_of_a_token_in_category('hate', :love).should == 1.0/3 # 0.5 / ( 1 + 0.5 )
|
39
|
+
bayes.probability_of_a_token_in_category('I', :love).should == 0.5
|
40
|
+
|
41
|
+
bayes.train :love, 'hate', 'is', 'love'
|
42
|
+
bayes.train :love, 'hate', 'is', 'love'
|
43
|
+
bayes.train :love, 'hate', 'is', 'love'
|
44
|
+
|
45
|
+
bayes.probability_of_a_token_in_category('love', :love).should == 5.0/6 # 1 / ( 1 + 0.2 )
|
46
|
+
bayes.probability_of_a_token_in_category('hate', :love).should == 3.0/7 # 0.75 / ( 0.75 + 1 )
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
30
50
|
describe '#total_number_of_items' do
|
31
51
|
it 'calculates correctly' do
|
32
52
|
bayes.train :love, 'I', 'love', 'you'
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Forrest Ye
|
@@ -46,7 +46,9 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- Gemfile
|
51
|
+
- Gemfile.ci
|
50
52
|
- Guardfile
|
51
53
|
- LICENSE.txt
|
52
54
|
- README.md
|