fisher_classifier 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/fisher_classifier/classifier.rb +14 -45
- data/lib/fisher_classifier/config.rb +6 -1
- data/lib/fisher_classifier/meta.rb +1 -1
- data/lib/fisher_classifier/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZmMyNjg5M2FhZmM5YjBhNTRhNzQ1Y2U0ZGNlZTY1MjgyZTRlMjc0YQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MTQ4M2E0YjgwMzUwZGU2NGIwNjAwZGQwNzZhODdlNWYwYjUyZWFkMQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2M0NGI4Njg1MjdkZDRiZWJiYmJiYTJlNTE3YmY5YTM3MWEyNTQwMGVkOTlm
|
10
|
+
OGRiNDgzNTIyNjZiMWFkZGFkM2Q2ZDJkOTgyNDdlMjc5OGNkOTMxNWEyMTM4
|
11
|
+
YjNhMWIzYWIxMmM0NWEyZjFmNzVkNjczODcxMjQ0YTlkN2ZhNDE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTNlM2M5MzZlOWQyY2FmN2NjOTA0NzUzNTlhMTBhMDNhYWMwYjQwZGUyMjVl
|
14
|
+
OGZlM2Y1MWYyMzg5NTUyYjM2YWFhZTk4Zjg0MjViYWM5NGNhNGE1YWIyYjBk
|
15
|
+
OWE5MjFlZGY4YTY4YmExYzU5ZTIwNmYzMzQ5M2RkMmQzMTA3NTE=
|
@@ -17,7 +17,7 @@ module FisherClassifier
|
|
17
17
|
def classify(text)
|
18
18
|
features = get_features(text)
|
19
19
|
best = default_category
|
20
|
-
max =
|
20
|
+
max = fisher_threshold
|
21
21
|
|
22
22
|
categories.each do |category|
|
23
23
|
prob = fisher_prob(category, features)
|
@@ -41,16 +41,16 @@ module FisherClassifier
|
|
41
41
|
)
|
42
42
|
end
|
43
43
|
|
44
|
+
def fisher_factor(probs_multiply)
|
45
|
+
-2 * Math.log(probs_multiply)
|
46
|
+
end
|
47
|
+
|
44
48
|
def probs_multiply(features, category)
|
45
49
|
fprobs = features.map { |f| weighted_prob(f, category) }
|
46
50
|
probs_multiply = fprobs.inject(:*)
|
47
51
|
probs_multiply ||= 0
|
48
52
|
end
|
49
53
|
|
50
|
-
def fisher_factor(probs_multiply)
|
51
|
-
-2 * Math.log(probs_multiply)
|
52
|
-
end
|
53
|
-
|
54
54
|
def feature_prob(feature, category)
|
55
55
|
cc = category_count(category)
|
56
56
|
return cc if cc.zero?
|
@@ -59,10 +59,10 @@ module FisherClassifier
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def weighted_prob(feature, category)
|
62
|
-
current_prob = category_prob(
|
62
|
+
current_prob = category_prob(category, feature)
|
63
63
|
totals = feature_in_all_categories(feature)
|
64
64
|
|
65
|
-
(weight *
|
65
|
+
(weight * assumed_prob + totals * current_prob) / ( weight + totals).to_f
|
66
66
|
end
|
67
67
|
|
68
68
|
def feature_in_all_categories(feature)
|
@@ -70,7 +70,7 @@ module FisherClassifier
|
|
70
70
|
counts.inject(:+)
|
71
71
|
end
|
72
72
|
|
73
|
-
def category_prob(
|
73
|
+
def category_prob(category, feature)
|
74
74
|
fp = feature_prob(feature, category)
|
75
75
|
return fp if fp.zero?
|
76
76
|
|
@@ -94,44 +94,13 @@ module FisherClassifier
|
|
94
94
|
[sum, 1.0].min
|
95
95
|
end
|
96
96
|
|
97
|
-
def
|
98
|
-
@config.
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
def weight
|
106
|
-
@config.get(:weight)
|
107
|
-
end
|
108
|
-
|
109
|
-
def ap
|
110
|
-
@config.get(:ap)
|
111
|
-
end
|
112
|
-
|
113
|
-
def get_features(text)
|
114
|
-
@config.call(:get_features, text)
|
115
|
-
end
|
116
|
-
|
117
|
-
def categories
|
118
|
-
@config.call(:categories)
|
119
|
-
end
|
120
|
-
|
121
|
-
def category_count(category)
|
122
|
-
@config.call(:category_count, category)
|
123
|
-
end
|
124
|
-
|
125
|
-
def features_count(feature, category)
|
126
|
-
@config.call(:features_count, feature, category)
|
127
|
-
end
|
128
|
-
|
129
|
-
def inc_feature(feature, category)
|
130
|
-
@config.call :inc_feature, feature, category
|
97
|
+
def method_missing(key, *values, &block)
|
98
|
+
if @config.respond_to?(key, values)
|
99
|
+
@config.call key, *values
|
100
|
+
else
|
101
|
+
@config.get(key)
|
102
|
+
end
|
131
103
|
end
|
132
104
|
|
133
|
-
def inc_category(category)
|
134
|
-
@config.call :inc_category, category
|
135
|
-
end
|
136
105
|
end
|
137
106
|
end
|
@@ -3,8 +3,9 @@ module FisherClassifier
|
|
3
3
|
|
4
4
|
def initialize(block)
|
5
5
|
@config = {
|
6
|
+
fisher_threshold: 0,
|
6
7
|
weight: 1.0,
|
7
|
-
|
8
|
+
assumed_prob: 0.5
|
8
9
|
}
|
9
10
|
@methods = {}
|
10
11
|
instance_eval &block
|
@@ -30,5 +31,9 @@ module FisherClassifier
|
|
30
31
|
end
|
31
32
|
end
|
32
33
|
|
34
|
+
def respond_to_missing?(method_name, include_private = false)
|
35
|
+
@methods.has_key? method_name
|
36
|
+
end
|
37
|
+
|
33
38
|
end
|
34
39
|
end
|
@@ -40,7 +40,7 @@ module FisherClassifier
|
|
40
40
|
categories.map do |category|
|
41
41
|
{
|
42
42
|
name: category,
|
43
|
-
category_prob: category_prob(
|
43
|
+
category_prob: category_prob(category, feature),
|
44
44
|
feature_prob: feature_prob(feature, category),
|
45
45
|
weighted_prob: weighted_prob(feature, category),
|
46
46
|
freqsum: feature_freqsum(feature, category)
|