bae 0.0.6-java → 0.0.7-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/lib/bae/classifier.rb +4 -0
- data/lib/bae/version.rb +1 -1
- data/spec/lib/bae/classifier_spec.rb +6 -0
- data/src/main/java/bae/NaiveBayesClassifier.java +37 -19
- data/target/bae.jar +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71c761f2619746bfc4dd287a5afa5443a7bfe037
|
4
|
+
data.tar.gz: 886257c2c8987fd8e95edbb63105ef87d7960de5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e02048771022daa4b61097ae500831a671f1a6ec3d8e9e48f235efd7ff9902be31678e503b4bd31b6cd0a0526d61c2868f1af39cb619c8c9fb120517242928cc
|
7
|
+
data.tar.gz: c2c7073db8b7afeea466a5aa9db9f157101a9a8e9b38d87787281c204990eb3c4b3f75b64a9ecb2fd7a8dc5629bd807d101b0ea702c38d735ba52e2595bdc822
|
data/README.md
CHANGED
@@ -28,6 +28,9 @@ You can refer to ["naivebayes"](https://github.com/id774/naivebayes) gem for mor
|
|
28
28
|
classifier = ::Bae::Classifier.new
|
29
29
|
classifier.train("positive", {"aaa" => 0, "bbb" => 1})
|
30
30
|
classifier.train("negative", {"ccc" => 2, "ddd" => 3})
|
31
|
+
|
32
|
+
classifier.finish_training!
|
33
|
+
|
31
34
|
classifier.classify({"aaa" => 1, "bbb" => 1})
|
32
35
|
|
33
36
|
#=> {"positive" => 0.8767123287671234, "negative" => 0.12328767123287669}
|
@@ -39,6 +42,9 @@ classifier = ::Bae::Classifier.new
|
|
39
42
|
classifier.train("positive", "aaa aaa bbb");
|
40
43
|
classifier.train("negative", "ccc ccc ddd ddd");
|
41
44
|
classifier.train("neutral", "eee eee eee fff fff fff");
|
45
|
+
|
46
|
+
classifier.finish_training!
|
47
|
+
|
42
48
|
classifier.classify("aaa bbb")
|
43
49
|
|
44
50
|
#=> {"positive"=>0.8962655601659751, "negative"=>0.0663900414937759, "neutral"=>0.037344398340248955}
|
data/lib/bae/classifier.rb
CHANGED
data/lib/bae/version.rb
CHANGED
@@ -7,6 +7,9 @@ describe ::Bae::Classifier do
|
|
7
7
|
it "can classify from ruby to java with a hash document" do
|
8
8
|
subject.train("positive", {"aaa" => 0, "bbb" => 1})
|
9
9
|
subject.train("negative", {"ccc" => 2, "ddd" => 3})
|
10
|
+
|
11
|
+
subject.finish_training!
|
12
|
+
|
10
13
|
results = subject.classify({"aaa" => 1, "bbb" => 1})
|
11
14
|
|
12
15
|
expect(results["positive"]).to be_within(0.001).of(0.94117)
|
@@ -17,6 +20,9 @@ describe ::Bae::Classifier do
|
|
17
20
|
subject.train("positive", "aaa aaa bbb");
|
18
21
|
subject.train("negative", "ccc ccc ddd ddd");
|
19
22
|
subject.train("neutral", "eee eee eee fff fff fff");
|
23
|
+
|
24
|
+
subject.finish_training!
|
25
|
+
|
20
26
|
results = subject.classify("aaa bbb")
|
21
27
|
|
22
28
|
expect(results["positive"]).to be_within(0.001).of(0.89626)
|
@@ -8,12 +8,16 @@ public class NaiveBayesClassifier {
|
|
8
8
|
private FrequencyTable frequencyTable;
|
9
9
|
private Map<String, Long> wordTable;
|
10
10
|
private Map<String, Long> instanceCountOf;
|
11
|
+
private Map<String, Double> initialLikelihoodOf;
|
12
|
+
Map<String, Double> classPriorOf;
|
11
13
|
private double totalCount = 0;
|
12
14
|
|
13
15
|
public NaiveBayesClassifier() {
|
14
16
|
this.frequencyTable = new FrequencyTable();
|
15
17
|
this.wordTable = new HashMap<>();
|
16
18
|
this.instanceCountOf = new HashMap<>();
|
19
|
+
this.initialLikelihoodOf = new HashMap<>();
|
20
|
+
this.classPriorOf = new HashMap<>();
|
17
21
|
}
|
18
22
|
|
19
23
|
public void train(String label, Document document) {
|
@@ -37,12 +41,23 @@ public class NaiveBayesClassifier {
|
|
37
41
|
updateIntegerCountBy(this.instanceCountOf, label, 1);
|
38
42
|
}
|
39
43
|
|
40
|
-
public
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
44
|
+
public void calculateInitialLikelihoods() {
|
45
|
+
// Update likelihood counts
|
46
|
+
for(String label : this.frequencyTable.getLabels()) {
|
47
|
+
// Set initial likelihood
|
48
|
+
initialLikelihoodOf.put(label, 1d);
|
49
|
+
|
50
|
+
// Calculate likelihoods
|
51
|
+
for (String word : this.wordTable.keySet()) {
|
52
|
+
double laplaceWordLikelihood =
|
53
|
+
(this.frequencyTable.get(label, word) + 1d) /
|
54
|
+
(this.instanceCountOf.get(label) + this.wordTable.size());
|
55
|
+
|
56
|
+
// Update likelihood
|
57
|
+
double likelihood = initialLikelihoodOf.get(label);
|
58
|
+
initialLikelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
|
59
|
+
}
|
60
|
+
}
|
46
61
|
|
47
62
|
// Update the prior
|
48
63
|
for(Map.Entry<String, Long> entry : this.instanceCountOf.entrySet()) {
|
@@ -50,36 +65,39 @@ public class NaiveBayesClassifier {
|
|
50
65
|
double frequency = entry.getValue();
|
51
66
|
|
52
67
|
// Update instance count
|
53
|
-
classPriorOf.put(label, (frequency / this.totalCount));
|
68
|
+
this.classPriorOf.put(label, (frequency / this.totalCount));
|
54
69
|
}
|
70
|
+
}
|
71
|
+
|
72
|
+
public Map<String, Double> classify(Document document) {
|
73
|
+
Map<String, Double> likelihoodOf = new HashMap<>();
|
74
|
+
Map<String, Double> classPosteriorOf = new HashMap<>();
|
75
|
+
Map<String, Long> featureFrequencyMap = document.getFrequencyMap();
|
76
|
+
double evidence = 0;
|
55
77
|
|
56
78
|
// Update likelihood counts
|
57
79
|
for(String label : this.frequencyTable.getLabels()) {
|
58
80
|
// Set initial likelihood
|
59
|
-
likelihoodOf.put(label,
|
81
|
+
likelihoodOf.put(label, this.initialLikelihoodOf.get(label));
|
60
82
|
|
61
|
-
// Calculate likelihoods
|
62
|
-
for(String word :
|
83
|
+
// Calculate actual likelihoods likelihoods
|
84
|
+
for(String word : featureFrequencyMap.keySet()) {
|
63
85
|
double laplaceWordLikelihood =
|
64
86
|
(this.frequencyTable.get(label, word) + 1d) /
|
65
87
|
(this.instanceCountOf.get(label) + this.wordTable.size());
|
66
88
|
|
67
|
-
// Update likelihood
|
89
|
+
// Update likelihood for words not in features
|
68
90
|
double likelihood = likelihoodOf.get(label);
|
69
|
-
if(
|
70
|
-
likelihoodOf.put(label, likelihood * laplaceWordLikelihood);
|
71
|
-
} else {
|
72
|
-
likelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
|
91
|
+
if(featureFrequencyMap.containsKey(word)) {
|
92
|
+
likelihoodOf.put(label, (likelihood * laplaceWordLikelihood) / (1d - laplaceWordLikelihood));
|
73
93
|
}
|
74
94
|
}
|
75
95
|
|
76
96
|
// Default class posterior of label to 1.0
|
77
|
-
|
78
|
-
classPosteriorOf.put(label, 1d);
|
79
|
-
}
|
97
|
+
classPosteriorOf.put(label, 1d);
|
80
98
|
|
81
99
|
// Update class posterior
|
82
|
-
double classPosterior = classPriorOf.get(label) * likelihoodOf.get(label);
|
100
|
+
double classPosterior = this.classPriorOf.get(label) * likelihoodOf.get(label);
|
83
101
|
classPosteriorOf.put(label, classPosterior);
|
84
102
|
evidence += classPosterior;
|
85
103
|
}
|
data/target/bae.jar
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Garrett Thornburg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|