yanbi-ml 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/bayes/bayes.rb CHANGED
@@ -20,6 +20,7 @@ module Yanbi
20
20
  @categories = categories
21
21
  @category_counts = {}
22
22
  @document_counts = {}
23
+ @category_sizes = {}
23
24
 
24
25
  @categories.each do |category|
25
26
  cat = category.to_sym
@@ -54,11 +55,13 @@ module Yanbi
54
55
  @category_counts[cat][word] ||= 0
55
56
  @category_counts[cat][word] += 1
56
57
  end
58
+
59
+ @category_sizes[cat] = category_size(cat)
57
60
  end
58
61
 
59
62
  def classify(document)
60
63
  max_score(document) do |cat, doc|
61
- cond_prob(cat, doc)
64
+ score(cat, doc)
62
65
  end
63
66
  end
64
67
 
@@ -75,6 +78,7 @@ module Yanbi
75
78
  categories.each do |category|
76
79
  cat = category.to_sym
77
80
  @category_counts[cat].reject! {|k,v| v < cutoff}
81
+ @category_sizes[cat] = category_size(cat)
78
82
  end
79
83
  end
80
84
 
@@ -84,16 +88,15 @@ module Yanbi
84
88
 
85
89
  private
86
90
 
87
- def cond_prob(cat, document)
91
+ def score(cat, document)
88
92
  total_docs = @document_counts.values.reduce(:+).to_f
89
93
  document_prob = document.words.uniq.map {|word| word_prob(cat, word)}.reduce(:+)
90
94
  document_prob + Math.log(@document_counts[cat] / total_docs)
91
95
  end
92
96
 
93
97
  def word_prob(cat, word)
94
- all_word_count = @category_counts[cat].values.reduce(&:+)
95
98
  count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0.1
96
- Math.log(count / all_word_count)
99
+ Math.log(count / @category_sizes[cat])
97
100
  end
98
101
 
99
102
  def max_score(document)
@@ -107,7 +110,10 @@ module Yanbi
107
110
  i = scores.rindex(scores.max)
108
111
  @categories[i]
109
112
  end
110
-
113
+
114
+ def category_size(cat)
115
+ @category_counts[cat].values.reduce(&:+).to_i
116
+ end
111
117
  end
112
118
 
113
119
  end
data/lib/bayes/fisher.rb CHANGED
@@ -5,16 +5,10 @@
5
5
  module Yanbi
6
6
 
7
7
  class Fisher < Bayes
8
-
9
- def classify(text)
10
- max_score(text) do |cat, doc|
11
- fisher_score(cat, doc)
12
- end
13
- end
14
-
8
+
15
9
  private
16
10
 
17
- def fisher_score(category, document)
11
+ def score(category, document)
18
12
  features = document.words.uniq
19
13
  probs = features.map {|x| weighted_prob(x, category)}
20
14
  pscores = probs.reduce(&:*)
@@ -30,9 +24,8 @@ module Yanbi
30
24
  end
31
25
 
32
26
  def word_prob(cat, word)
33
- all_word_count = @category_counts[cat].values.reduce(&:+)
34
27
  count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0
35
- count / all_word_count
28
+ count / @category_sizes[cat]
36
29
  end
37
30
 
38
31
  def weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5)
data/lib/version.rb CHANGED
@@ -3,5 +3,5 @@
3
3
  # License:: MIT
4
4
 
5
5
  module Yanbi
6
- VERSION = "0.2.1"
6
+ VERSION = "0.2.2"
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yanbi-ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: