fisher_classifier 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OGYyYTEwNDI3NjdhNjYwNmYxMTllYzhlZWJiNDE1ODhjNGQ5OGE0Yw==
4
+ ZmMyNjg5M2FhZmM5YjBhNTRhNzQ1Y2U0ZGNlZTY1MjgyZTRlMjc0YQ==
5
5
  data.tar.gz: !binary |-
6
- NjAzYTczODQzZWIwZjhjNTRjM2I1OTFiMTk2MDk1ZjMxNWI4ODEyMg==
6
+ MTQ4M2E0YjgwMzUwZGU2NGIwNjAwZGQwNzZhODdlNWYwYjUyZWFkMQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NWQ3NjQyN2U1MzI4MjExMzIyNjAwNDFiYzQxMmMyMTdjMTNjNTE0N2FmMDU3
10
- OWQ2NDc5OGIxMzVlOGZjNDUwMzY2MTliNjg5YjU2ZTkxZWYwYTUwM2QxZTgy
11
- M2FhOWVhYWYwZjgzZTI5ZTM4YWYzMThmMDZlYzE0YjEwMGM2N2Q=
9
+ M2M0NGI4Njg1MjdkZDRiZWJiYmJiYTJlNTE3YmY5YTM3MWEyNTQwMGVkOTlm
10
+ OGRiNDgzNTIyNjZiMWFkZGFkM2Q2ZDJkOTgyNDdlMjc5OGNkOTMxNWEyMTM4
11
+ YjNhMWIzYWIxMmM0NWEyZjFmNzVkNjczODcxMjQ0YTlkN2ZhNDE=
12
12
  data.tar.gz: !binary |-
13
- NGMwNjFlZDEwZjlhZDZkNjY0M2E4MTJlY2Q3YTdhNzVhOGUzNmYwMWQ5MzA5
14
- OTIzOGRiMjZlNjZmMjE4YTIxYjg0ZjhkMTQwMjA3MjUxNTNhNTdkMGRkYzkw
15
- MTgwOWZmNTZkYWE3ZTg4Y2RjMDYxYWY1Y2ZiNGFlZDZmZWFkNmQ=
13
+ YTNlM2M5MzZlOWQyY2FmN2NjOTA0NzUzNTlhMTBhMDNhYWMwYjQwZGUyMjVl
14
+ OGZlM2Y1MWYyMzg5NTUyYjM2YWFhZTk4Zjg0MjViYWM5NGNhNGE1YWIyYjBk
15
+ OWE5MjFlZGY4YTY4YmExYzU5ZTIwNmYzMzQ5M2RkMmQzMTA3NTE=
@@ -17,7 +17,7 @@ module FisherClassifier
17
17
  def classify(text)
18
18
  features = get_features(text)
19
19
  best = default_category
20
- max = 0.0
20
+ max = fisher_threshold
21
21
 
22
22
  categories.each do |category|
23
23
  prob = fisher_prob(category, features)
@@ -41,16 +41,16 @@ module FisherClassifier
41
41
  )
42
42
  end
43
43
 
44
+ def fisher_factor(probs_multiply)
45
+ -2 * Math.log(probs_multiply)
46
+ end
47
+
44
48
  def probs_multiply(features, category)
45
49
  fprobs = features.map { |f| weighted_prob(f, category) }
46
50
  probs_multiply = fprobs.inject(:*)
47
51
  probs_multiply ||= 0
48
52
  end
49
53
 
50
- def fisher_factor(probs_multiply)
51
- -2 * Math.log(probs_multiply)
52
- end
53
-
54
54
  def feature_prob(feature, category)
55
55
  cc = category_count(category)
56
56
  return cc if cc.zero?
@@ -59,10 +59,10 @@ module FisherClassifier
59
59
  end
60
60
 
61
61
  def weighted_prob(feature, category)
62
- current_prob = category_prob(feature, category)
62
+ current_prob = category_prob(category, feature)
63
63
  totals = feature_in_all_categories(feature)
64
64
 
65
- (weight * ap + totals * current_prob) / ( weight + totals).to_f
65
+ (weight * assumed_prob + totals * current_prob) / ( weight + totals).to_f
66
66
  end
67
67
 
68
68
  def feature_in_all_categories(feature)
@@ -70,7 +70,7 @@ module FisherClassifier
70
70
  counts.inject(:+)
71
71
  end
72
72
 
73
- def category_prob(feature, category)
73
+ def category_prob(category, feature)
74
74
  fp = feature_prob(feature, category)
75
75
  return fp if fp.zero?
76
76
 
@@ -94,44 +94,13 @@ module FisherClassifier
94
94
  [sum, 1.0].min
95
95
  end
96
96
 
97
- def default_category
98
- @config.call(:default_category)
99
- end
100
-
101
- def category_threshold(category)
102
- @config.call(:category_threshold, category)
103
- end
104
-
105
- def weight
106
- @config.get(:weight)
107
- end
108
-
109
- def ap
110
- @config.get(:ap)
111
- end
112
-
113
- def get_features(text)
114
- @config.call(:get_features, text)
115
- end
116
-
117
- def categories
118
- @config.call(:categories)
119
- end
120
-
121
- def category_count(category)
122
- @config.call(:category_count, category)
123
- end
124
-
125
- def features_count(feature, category)
126
- @config.call(:features_count, feature, category)
127
- end
128
-
129
- def inc_feature(feature, category)
130
- @config.call :inc_feature, feature, category
97
+ def method_missing(key, *values, &block)
98
+ if @config.respond_to?(key, values)
99
+ @config.call key, *values
100
+ else
101
+ @config.get(key)
102
+ end
131
103
  end
132
104
 
133
- def inc_category(category)
134
- @config.call :inc_category, category
135
- end
136
105
  end
137
106
  end
@@ -3,8 +3,9 @@ module FisherClassifier
3
3
 
4
4
  def initialize(block)
5
5
  @config = {
6
+ fisher_threshold: 0,
6
7
  weight: 1.0,
7
- ap: 0.5
8
+ assumed_prob: 0.5
8
9
  }
9
10
  @methods = {}
10
11
  instance_eval &block
@@ -30,5 +31,9 @@ module FisherClassifier
30
31
  end
31
32
  end
32
33
 
34
+ def respond_to_missing?(method_name, include_private = false)
35
+ @methods.has_key? method_name
36
+ end
37
+
33
38
  end
34
39
  end
@@ -40,7 +40,7 @@ module FisherClassifier
40
40
  categories.map do |category|
41
41
  {
42
42
  name: category,
43
- category_prob: category_prob(feature, category),
43
+ category_prob: category_prob(category, feature),
44
44
  feature_prob: feature_prob(feature, category),
45
45
  weighted_prob: weighted_prob(feature, category),
46
46
  freqsum: feature_freqsum(feature, category)
@@ -1,3 +1,3 @@
1
1
  module FisherClassifier
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fisher_classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew8xx8