fisher_classifier 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/fisher_classifier/classifier.rb +14 -45
- data/lib/fisher_classifier/config.rb +6 -1
- data/lib/fisher_classifier/meta.rb +1 -1
- data/lib/fisher_classifier/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZmMyNjg5M2FhZmM5YjBhNTRhNzQ1Y2U0ZGNlZTY1MjgyZTRlMjc0YQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MTQ4M2E0YjgwMzUwZGU2NGIwNjAwZGQwNzZhODdlNWYwYjUyZWFkMQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2M0NGI4Njg1MjdkZDRiZWJiYmJiYTJlNTE3YmY5YTM3MWEyNTQwMGVkOTlm
|
10
|
+
OGRiNDgzNTIyNjZiMWFkZGFkM2Q2ZDJkOTgyNDdlMjc5OGNkOTMxNWEyMTM4
|
11
|
+
YjNhMWIzYWIxMmM0NWEyZjFmNzVkNjczODcxMjQ0YTlkN2ZhNDE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTNlM2M5MzZlOWQyY2FmN2NjOTA0NzUzNTlhMTBhMDNhYWMwYjQwZGUyMjVl
|
14
|
+
OGZlM2Y1MWYyMzg5NTUyYjM2YWFhZTk4Zjg0MjViYWM5NGNhNGE1YWIyYjBk
|
15
|
+
OWE5MjFlZGY4YTY4YmExYzU5ZTIwNmYzMzQ5M2RkMmQzMTA3NTE=
|
@@ -17,7 +17,7 @@ module FisherClassifier
|
|
17
17
|
def classify(text)
|
18
18
|
features = get_features(text)
|
19
19
|
best = default_category
|
20
|
-
max =
|
20
|
+
max = fisher_threshold
|
21
21
|
|
22
22
|
categories.each do |category|
|
23
23
|
prob = fisher_prob(category, features)
|
@@ -41,16 +41,16 @@ module FisherClassifier
|
|
41
41
|
)
|
42
42
|
end
|
43
43
|
|
44
|
+
def fisher_factor(probs_multiply)
|
45
|
+
-2 * Math.log(probs_multiply)
|
46
|
+
end
|
47
|
+
|
44
48
|
def probs_multiply(features, category)
|
45
49
|
fprobs = features.map { |f| weighted_prob(f, category) }
|
46
50
|
probs_multiply = fprobs.inject(:*)
|
47
51
|
probs_multiply ||= 0
|
48
52
|
end
|
49
53
|
|
50
|
-
def fisher_factor(probs_multiply)
|
51
|
-
-2 * Math.log(probs_multiply)
|
52
|
-
end
|
53
|
-
|
54
54
|
def feature_prob(feature, category)
|
55
55
|
cc = category_count(category)
|
56
56
|
return cc if cc.zero?
|
@@ -59,10 +59,10 @@ module FisherClassifier
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def weighted_prob(feature, category)
|
62
|
-
current_prob = category_prob(
|
62
|
+
current_prob = category_prob(category, feature)
|
63
63
|
totals = feature_in_all_categories(feature)
|
64
64
|
|
65
|
-
(weight *
|
65
|
+
(weight * assumed_prob + totals * current_prob) / ( weight + totals).to_f
|
66
66
|
end
|
67
67
|
|
68
68
|
def feature_in_all_categories(feature)
|
@@ -70,7 +70,7 @@ module FisherClassifier
|
|
70
70
|
counts.inject(:+)
|
71
71
|
end
|
72
72
|
|
73
|
-
def category_prob(
|
73
|
+
def category_prob(category, feature)
|
74
74
|
fp = feature_prob(feature, category)
|
75
75
|
return fp if fp.zero?
|
76
76
|
|
@@ -94,44 +94,13 @@ module FisherClassifier
|
|
94
94
|
[sum, 1.0].min
|
95
95
|
end
|
96
96
|
|
97
|
-
def
|
98
|
-
@config.
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
def weight
|
106
|
-
@config.get(:weight)
|
107
|
-
end
|
108
|
-
|
109
|
-
def ap
|
110
|
-
@config.get(:ap)
|
111
|
-
end
|
112
|
-
|
113
|
-
def get_features(text)
|
114
|
-
@config.call(:get_features, text)
|
115
|
-
end
|
116
|
-
|
117
|
-
def categories
|
118
|
-
@config.call(:categories)
|
119
|
-
end
|
120
|
-
|
121
|
-
def category_count(category)
|
122
|
-
@config.call(:category_count, category)
|
123
|
-
end
|
124
|
-
|
125
|
-
def features_count(feature, category)
|
126
|
-
@config.call(:features_count, feature, category)
|
127
|
-
end
|
128
|
-
|
129
|
-
def inc_feature(feature, category)
|
130
|
-
@config.call :inc_feature, feature, category
|
97
|
+
def method_missing(key, *values, &block)
|
98
|
+
if @config.respond_to?(key, values)
|
99
|
+
@config.call key, *values
|
100
|
+
else
|
101
|
+
@config.get(key)
|
102
|
+
end
|
131
103
|
end
|
132
104
|
|
133
|
-
def inc_category(category)
|
134
|
-
@config.call :inc_category, category
|
135
|
-
end
|
136
105
|
end
|
137
106
|
end
|
@@ -3,8 +3,9 @@ module FisherClassifier
|
|
3
3
|
|
4
4
|
def initialize(block)
|
5
5
|
@config = {
|
6
|
+
fisher_threshold: 0,
|
6
7
|
weight: 1.0,
|
7
|
-
|
8
|
+
assumed_prob: 0.5
|
8
9
|
}
|
9
10
|
@methods = {}
|
10
11
|
instance_eval &block
|
@@ -30,5 +31,9 @@ module FisherClassifier
|
|
30
31
|
end
|
31
32
|
end
|
32
33
|
|
34
|
+
def respond_to_missing?(method_name, include_private = false)
|
35
|
+
@methods.has_key? method_name
|
36
|
+
end
|
37
|
+
|
33
38
|
end
|
34
39
|
end
|
@@ -40,7 +40,7 @@ module FisherClassifier
|
|
40
40
|
categories.map do |category|
|
41
41
|
{
|
42
42
|
name: category,
|
43
|
-
category_prob: category_prob(
|
43
|
+
category_prob: category_prob(category, feature),
|
44
44
|
feature_prob: feature_prob(feature, category),
|
45
45
|
weighted_prob: weighted_prob(feature, category),
|
46
46
|
freqsum: feature_freqsum(feature, category)
|