bae 0.0.6-java → 0.0.7-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/lib/bae/classifier.rb +4 -0
- data/lib/bae/version.rb +1 -1
- data/spec/lib/bae/classifier_spec.rb +6 -0
- data/src/main/java/bae/NaiveBayesClassifier.java +37 -19
- data/target/bae.jar +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71c761f2619746bfc4dd287a5afa5443a7bfe037
|
4
|
+
data.tar.gz: 886257c2c8987fd8e95edbb63105ef87d7960de5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e02048771022daa4b61097ae500831a671f1a6ec3d8e9e48f235efd7ff9902be31678e503b4bd31b6cd0a0526d61c2868f1af39cb619c8c9fb120517242928cc
|
7
|
+
data.tar.gz: c2c7073db8b7afeea466a5aa9db9f157101a9a8e9b38d87787281c204990eb3c4b3f75b64a9ecb2fd7a8dc5629bd807d101b0ea702c38d735ba52e2595bdc822
|
data/README.md
CHANGED
@@ -28,6 +28,9 @@ You can refer to ["naivebayes"](https://github.com/id774/naivebayes) gem for mor
|
|
28
28
|
classifier = ::Bae::Classifier.new
|
29
29
|
classifier.train("positive", {"aaa" => 0, "bbb" => 1})
|
30
30
|
classifier.train("negative", {"ccc" => 2, "ddd" => 3})
|
31
|
+
|
32
|
+
classifier.finish_training!
|
33
|
+
|
31
34
|
classifier.classify({"aaa" => 1, "bbb" => 1})
|
32
35
|
|
33
36
|
#=> {"positive" => 0.8767123287671234, "negative" => 0.12328767123287669}
|
@@ -39,6 +42,9 @@ classifier = ::Bae::Classifier.new
|
|
39
42
|
classifier.train("positive", "aaa aaa bbb");
|
40
43
|
classifier.train("negative", "ccc ccc ddd ddd");
|
41
44
|
classifier.train("neutral", "eee eee eee fff fff fff");
|
45
|
+
|
46
|
+
classifier.finish_training!
|
47
|
+
|
42
48
|
classifier.classify("aaa bbb")
|
43
49
|
|
44
50
|
#=> {"positive"=>0.8962655601659751, "negative"=>0.0663900414937759, "neutral"=>0.037344398340248955}
|
data/lib/bae/classifier.rb
CHANGED
data/lib/bae/version.rb
CHANGED
@@ -7,6 +7,9 @@ describe ::Bae::Classifier do
|
|
7
7
|
it "can classify from ruby to java with a hash document" do
|
8
8
|
subject.train("positive", {"aaa" => 0, "bbb" => 1})
|
9
9
|
subject.train("negative", {"ccc" => 2, "ddd" => 3})
|
10
|
+
|
11
|
+
subject.finish_training!
|
12
|
+
|
10
13
|
results = subject.classify({"aaa" => 1, "bbb" => 1})
|
11
14
|
|
12
15
|
expect(results["positive"]).to be_within(0.001).of(0.94117)
|
@@ -17,6 +20,9 @@ describe ::Bae::Classifier do
|
|
17
20
|
subject.train("positive", "aaa aaa bbb");
|
18
21
|
subject.train("negative", "ccc ccc ddd ddd");
|
19
22
|
subject.train("neutral", "eee eee eee fff fff fff");
|
23
|
+
|
24
|
+
subject.finish_training!
|
25
|
+
|
20
26
|
results = subject.classify("aaa bbb")
|
21
27
|
|
22
28
|
expect(results["positive"]).to be_within(0.001).of(0.89626)
|
@@ -8,12 +8,16 @@ public class NaiveBayesClassifier {
|
|
8
8
|
private FrequencyTable frequencyTable;
|
9
9
|
private Map<String, Long> wordTable;
|
10
10
|
private Map<String, Long> instanceCountOf;
|
11
|
+
private Map<String, Double> initialLikelihoodOf;
|
12
|
+
Map<String, Double> classPriorOf;
|
11
13
|
private double totalCount = 0;
|
12
14
|
|
13
15
|
public NaiveBayesClassifier() {
|
14
16
|
this.frequencyTable = new FrequencyTable();
|
15
17
|
this.wordTable = new HashMap<>();
|
16
18
|
this.instanceCountOf = new HashMap<>();
|
19
|
+
this.initialLikelihoodOf = new HashMap<>();
|
20
|
+
this.classPriorOf = new HashMap<>();
|
17
21
|
}
|
18
22
|
|
19
23
|
public void train(String label, Document document) {
|
@@ -37,12 +41,23 @@ public class NaiveBayesClassifier {
|
|
37
41
|
updateIntegerCountBy(this.instanceCountOf, label, 1);
|
38
42
|
}
|
39
43
|
|
40
|
-
public
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
44
|
+
public void calculateInitialLikelihoods() {
|
45
|
+
// Update likelihood counts
|
46
|
+
for(String label : this.frequencyTable.getLabels()) {
|
47
|
+
// Set initial likelihood
|
48
|
+
initialLikelihoodOf.put(label, 1d);
|
49
|
+
|
50
|
+
// Calculate likelihoods
|
51
|
+
for (String word : this.wordTable.keySet()) {
|
52
|
+
double laplaceWordLikelihood =
|
53
|
+
(this.frequencyTable.get(label, word) + 1d) /
|
54
|
+
(this.instanceCountOf.get(label) + this.wordTable.size());
|
55
|
+
|
56
|
+
// Update likelihood
|
57
|
+
double likelihood = initialLikelihoodOf.get(label);
|
58
|
+
initialLikelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
|
59
|
+
}
|
60
|
+
}
|
46
61
|
|
47
62
|
// Update the prior
|
48
63
|
for(Map.Entry<String, Long> entry : this.instanceCountOf.entrySet()) {
|
@@ -50,36 +65,39 @@ public class NaiveBayesClassifier {
|
|
50
65
|
double frequency = entry.getValue();
|
51
66
|
|
52
67
|
// Update instance count
|
53
|
-
classPriorOf.put(label, (frequency / this.totalCount));
|
68
|
+
this.classPriorOf.put(label, (frequency / this.totalCount));
|
54
69
|
}
|
70
|
+
}
|
71
|
+
|
72
|
+
public Map<String, Double> classify(Document document) {
|
73
|
+
Map<String, Double> likelihoodOf = new HashMap<>();
|
74
|
+
Map<String, Double> classPosteriorOf = new HashMap<>();
|
75
|
+
Map<String, Long> featureFrequencyMap = document.getFrequencyMap();
|
76
|
+
double evidence = 0;
|
55
77
|
|
56
78
|
// Update likelihood counts
|
57
79
|
for(String label : this.frequencyTable.getLabels()) {
|
58
80
|
// Set initial likelihood
|
59
|
-
likelihoodOf.put(label,
|
81
|
+
likelihoodOf.put(label, this.initialLikelihoodOf.get(label));
|
60
82
|
|
61
|
-
// Calculate likelihoods
|
62
|
-
for(String word :
|
83
|
+
// Calculate actual likelihoods likelihoods
|
84
|
+
for(String word : featureFrequencyMap.keySet()) {
|
63
85
|
double laplaceWordLikelihood =
|
64
86
|
(this.frequencyTable.get(label, word) + 1d) /
|
65
87
|
(this.instanceCountOf.get(label) + this.wordTable.size());
|
66
88
|
|
67
|
-
// Update likelihood
|
89
|
+
// Update likelihood for words not in features
|
68
90
|
double likelihood = likelihoodOf.get(label);
|
69
|
-
if(
|
70
|
-
likelihoodOf.put(label, likelihood * laplaceWordLikelihood);
|
71
|
-
} else {
|
72
|
-
likelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
|
91
|
+
if(featureFrequencyMap.containsKey(word)) {
|
92
|
+
likelihoodOf.put(label, (likelihood * laplaceWordLikelihood) / (1d - laplaceWordLikelihood));
|
73
93
|
}
|
74
94
|
}
|
75
95
|
|
76
96
|
// Default class posterior of label to 1.0
|
77
|
-
|
78
|
-
classPosteriorOf.put(label, 1d);
|
79
|
-
}
|
97
|
+
classPosteriorOf.put(label, 1d);
|
80
98
|
|
81
99
|
// Update class posterior
|
82
|
-
double classPosterior = classPriorOf.get(label) * likelihoodOf.get(label);
|
100
|
+
double classPosterior = this.classPriorOf.get(label) * likelihoodOf.get(label);
|
83
101
|
classPosteriorOf.put(label, classPosterior);
|
84
102
|
evidence += classPosterior;
|
85
103
|
}
|
data/target/bae.jar
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Garrett Thornburg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|