fisher_classifier 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OGYyYTEwNDI3NjdhNjYwNmYxMTllYzhlZWJiNDE1ODhjNGQ5OGE0Yw==
4
+ ZmMyNjg5M2FhZmM5YjBhNTRhNzQ1Y2U0ZGNlZTY1MjgyZTRlMjc0YQ==
5
5
  data.tar.gz: !binary |-
6
- NjAzYTczODQzZWIwZjhjNTRjM2I1OTFiMTk2MDk1ZjMxNWI4ODEyMg==
6
+ MTQ4M2E0YjgwMzUwZGU2NGIwNjAwZGQwNzZhODdlNWYwYjUyZWFkMQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NWQ3NjQyN2U1MzI4MjExMzIyNjAwNDFiYzQxMmMyMTdjMTNjNTE0N2FmMDU3
10
- OWQ2NDc5OGIxMzVlOGZjNDUwMzY2MTliNjg5YjU2ZTkxZWYwYTUwM2QxZTgy
11
- M2FhOWVhYWYwZjgzZTI5ZTM4YWYzMThmMDZlYzE0YjEwMGM2N2Q=
9
+ M2M0NGI4Njg1MjdkZDRiZWJiYmJiYTJlNTE3YmY5YTM3MWEyNTQwMGVkOTlm
10
+ OGRiNDgzNTIyNjZiMWFkZGFkM2Q2ZDJkOTgyNDdlMjc5OGNkOTMxNWEyMTM4
11
+ YjNhMWIzYWIxMmM0NWEyZjFmNzVkNjczODcxMjQ0YTlkN2ZhNDE=
12
12
  data.tar.gz: !binary |-
13
- NGMwNjFlZDEwZjlhZDZkNjY0M2E4MTJlY2Q3YTdhNzVhOGUzNmYwMWQ5MzA5
14
- OTIzOGRiMjZlNjZmMjE4YTIxYjg0ZjhkMTQwMjA3MjUxNTNhNTdkMGRkYzkw
15
- MTgwOWZmNTZkYWE3ZTg4Y2RjMDYxYWY1Y2ZiNGFlZDZmZWFkNmQ=
13
+ YTNlM2M5MzZlOWQyY2FmN2NjOTA0NzUzNTlhMTBhMDNhYWMwYjQwZGUyMjVl
14
+ OGZlM2Y1MWYyMzg5NTUyYjM2YWFhZTk4Zjg0MjViYWM5NGNhNGE1YWIyYjBk
15
+ OWE5MjFlZGY4YTY4YmExYzU5ZTIwNmYzMzQ5M2RkMmQzMTA3NTE=
@@ -17,7 +17,7 @@ module FisherClassifier
17
17
  def classify(text)
18
18
  features = get_features(text)
19
19
  best = default_category
20
- max = 0.0
20
+ max = fisher_threshold
21
21
 
22
22
  categories.each do |category|
23
23
  prob = fisher_prob(category, features)
@@ -41,16 +41,16 @@ module FisherClassifier
41
41
  )
42
42
  end
43
43
 
44
+ def fisher_factor(probs_multiply)
45
+ -2 * Math.log(probs_multiply)
46
+ end
47
+
44
48
  def probs_multiply(features, category)
45
49
  fprobs = features.map { |f| weighted_prob(f, category) }
46
50
  probs_multiply = fprobs.inject(:*)
47
51
  probs_multiply ||= 0
48
52
  end
49
53
 
50
- def fisher_factor(probs_multiply)
51
- -2 * Math.log(probs_multiply)
52
- end
53
-
54
54
  def feature_prob(feature, category)
55
55
  cc = category_count(category)
56
56
  return cc if cc.zero?
@@ -59,10 +59,10 @@ module FisherClassifier
59
59
  end
60
60
 
61
61
  def weighted_prob(feature, category)
62
- current_prob = category_prob(feature, category)
62
+ current_prob = category_prob(category, feature)
63
63
  totals = feature_in_all_categories(feature)
64
64
 
65
- (weight * ap + totals * current_prob) / ( weight + totals).to_f
65
+ (weight * assumed_prob + totals * current_prob) / ( weight + totals).to_f
66
66
  end
67
67
 
68
68
  def feature_in_all_categories(feature)
@@ -70,7 +70,7 @@ module FisherClassifier
70
70
  counts.inject(:+)
71
71
  end
72
72
 
73
- def category_prob(feature, category)
73
+ def category_prob(category, feature)
74
74
  fp = feature_prob(feature, category)
75
75
  return fp if fp.zero?
76
76
 
@@ -94,44 +94,13 @@ module FisherClassifier
94
94
  [sum, 1.0].min
95
95
  end
96
96
 
97
- def default_category
98
- @config.call(:default_category)
99
- end
100
-
101
- def category_threshold(category)
102
- @config.call(:category_threshold, category)
103
- end
104
-
105
- def weight
106
- @config.get(:weight)
107
- end
108
-
109
- def ap
110
- @config.get(:ap)
111
- end
112
-
113
- def get_features(text)
114
- @config.call(:get_features, text)
115
- end
116
-
117
- def categories
118
- @config.call(:categories)
119
- end
120
-
121
- def category_count(category)
122
- @config.call(:category_count, category)
123
- end
124
-
125
- def features_count(feature, category)
126
- @config.call(:features_count, feature, category)
127
- end
128
-
129
- def inc_feature(feature, category)
130
- @config.call :inc_feature, feature, category
97
+ def method_missing(key, *values, &block)
98
+ if @config.respond_to?(key, values)
99
+ @config.call key, *values
100
+ else
101
+ @config.get(key)
102
+ end
131
103
  end
132
104
 
133
- def inc_category(category)
134
- @config.call :inc_category, category
135
- end
136
105
  end
137
106
  end
@@ -3,8 +3,9 @@ module FisherClassifier
3
3
 
4
4
  def initialize(block)
5
5
  @config = {
6
+ fisher_threshold: 0,
6
7
  weight: 1.0,
7
- ap: 0.5
8
+ assumed_prob: 0.5
8
9
  }
9
10
  @methods = {}
10
11
  instance_eval &block
@@ -30,5 +31,9 @@ module FisherClassifier
30
31
  end
31
32
  end
32
33
 
34
+ def respond_to_missing?(method_name, include_private = false)
35
+ @methods.has_key? method_name
36
+ end
37
+
33
38
  end
34
39
  end
@@ -40,7 +40,7 @@ module FisherClassifier
40
40
  categories.map do |category|
41
41
  {
42
42
  name: category,
43
- category_prob: category_prob(feature, category),
43
+ category_prob: category_prob(category, feature),
44
44
  feature_prob: feature_prob(feature, category),
45
45
  weighted_prob: weighted_prob(feature, category),
46
46
  freqsum: feature_freqsum(feature, category)
@@ -1,3 +1,3 @@
1
1
  module FisherClassifier
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fisher_classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew8xx8