bae 0.0.6-java → 0.0.7-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f7b557eff0f63a4dfa9007451f932293eb80ece
4
- data.tar.gz: 33f009d0119de54ae777e5f76537ca2661dcd047
3
+ metadata.gz: 71c761f2619746bfc4dd287a5afa5443a7bfe037
4
+ data.tar.gz: 886257c2c8987fd8e95edbb63105ef87d7960de5
5
5
  SHA512:
6
- metadata.gz: 741fb65cac66f3f47a536e20618dd4dd75880079616652e57651f139c5ad384ca2e508c84d86bf813176e8b39ef590810a8daa33a0dda56b67ffd490fb7a530d
7
- data.tar.gz: bc8e0e209dd1b88995e39cbb5d6d18db1437b519599b71c1f6fef06aa22af770da3f6fe7d45c897682056755a5d97023d3c073b3d9329e74ab6407b157173d66
6
+ metadata.gz: e02048771022daa4b61097ae500831a671f1a6ec3d8e9e48f235efd7ff9902be31678e503b4bd31b6cd0a0526d61c2868f1af39cb619c8c9fb120517242928cc
7
+ data.tar.gz: c2c7073db8b7afeea466a5aa9db9f157101a9a8e9b38d87787281c204990eb3c4b3f75b64a9ecb2fd7a8dc5629bd807d101b0ea702c38d735ba52e2595bdc822
data/README.md CHANGED
@@ -28,6 +28,9 @@ You can refer to ["naivebayes"](https://github.com/id774/naivebayes) gem for mor
28
28
  classifier = ::Bae::Classifier.new
29
29
  classifier.train("positive", {"aaa" => 0, "bbb" => 1})
30
30
  classifier.train("negative", {"ccc" => 2, "ddd" => 3})
31
+
32
+ classifier.finish_training!
33
+
31
34
  classifier.classify({"aaa" => 1, "bbb" => 1})
32
35
 
33
36
  #=> {"positive" => 0.8767123287671234, "negative" => 0.12328767123287669}
@@ -39,6 +42,9 @@ classifier = ::Bae::Classifier.new
39
42
  classifier.train("positive", "aaa aaa bbb");
40
43
  classifier.train("negative", "ccc ccc ddd ddd");
41
44
  classifier.train("neutral", "eee eee eee fff fff fff");
45
+
46
+ classifier.finish_training!
47
+
42
48
  classifier.classify("aaa bbb")
43
49
 
44
50
  #=> {"positive"=>0.8962655601659751, "negative"=>0.0663900414937759, "neutral"=>0.037344398340248955}
@@ -15,5 +15,9 @@ module Bae
15
15
  internal_classifier.classify(::Java::Bae::Document.new(feature))
16
16
  end
17
17
 
18
+ def finish_training!
19
+ internal_classifier.calculateInitialLikelihoods()
20
+ end
21
+
18
22
  end
19
23
  end
data/lib/bae/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Bae
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -7,6 +7,9 @@ describe ::Bae::Classifier do
7
7
  it "can classify from ruby to java with a hash document" do
8
8
  subject.train("positive", {"aaa" => 0, "bbb" => 1})
9
9
  subject.train("negative", {"ccc" => 2, "ddd" => 3})
10
+
11
+ subject.finish_training!
12
+
10
13
  results = subject.classify({"aaa" => 1, "bbb" => 1})
11
14
 
12
15
  expect(results["positive"]).to be_within(0.001).of(0.94117)
@@ -17,6 +20,9 @@ describe ::Bae::Classifier do
17
20
  subject.train("positive", "aaa aaa bbb");
18
21
  subject.train("negative", "ccc ccc ddd ddd");
19
22
  subject.train("neutral", "eee eee eee fff fff fff");
23
+
24
+ subject.finish_training!
25
+
20
26
  results = subject.classify("aaa bbb")
21
27
 
22
28
  expect(results["positive"]).to be_within(0.001).of(0.89626)
@@ -8,12 +8,16 @@ public class NaiveBayesClassifier {
8
8
  private FrequencyTable frequencyTable;
9
9
  private Map<String, Long> wordTable;
10
10
  private Map<String, Long> instanceCountOf;
11
+ private Map<String, Double> initialLikelihoodOf;
12
+ Map<String, Double> classPriorOf;
11
13
  private double totalCount = 0;
12
14
 
13
15
  public NaiveBayesClassifier() {
14
16
  this.frequencyTable = new FrequencyTable();
15
17
  this.wordTable = new HashMap<>();
16
18
  this.instanceCountOf = new HashMap<>();
19
+ this.initialLikelihoodOf = new HashMap<>();
20
+ this.classPriorOf = new HashMap<>();
17
21
  }
18
22
 
19
23
  public void train(String label, Document document) {
@@ -37,12 +41,23 @@ public class NaiveBayesClassifier {
37
41
  updateIntegerCountBy(this.instanceCountOf, label, 1);
38
42
  }
39
43
 
40
- public Map<String, Double> classify(Document document) {
41
- Map<String, Double> classPriorOf = new HashMap<>();
42
- Map<String, Double> likelihoodOf = new HashMap<>();
43
- Map<String, Double> classPosteriorOf = new HashMap<>();
44
- Map<String, Long> frequencyMap = document.getFrequencyMap();
45
- double evidence = 0;
44
+ public void calculateInitialLikelihoods() {
45
+ // Update likelihood counts
46
+ for(String label : this.frequencyTable.getLabels()) {
47
+ // Set initial likelihood
48
+ initialLikelihoodOf.put(label, 1d);
49
+
50
+ // Calculate likelihoods
51
+ for (String word : this.wordTable.keySet()) {
52
+ double laplaceWordLikelihood =
53
+ (this.frequencyTable.get(label, word) + 1d) /
54
+ (this.instanceCountOf.get(label) + this.wordTable.size());
55
+
56
+ // Update likelihood
57
+ double likelihood = initialLikelihoodOf.get(label);
58
+ initialLikelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
59
+ }
60
+ }
46
61
 
47
62
  // Update the prior
48
63
  for(Map.Entry<String, Long> entry : this.instanceCountOf.entrySet()) {
@@ -50,36 +65,39 @@ public class NaiveBayesClassifier {
50
65
  double frequency = entry.getValue();
51
66
 
52
67
  // Update instance count
53
- classPriorOf.put(label, (frequency / this.totalCount));
68
+ this.classPriorOf.put(label, (frequency / this.totalCount));
54
69
  }
70
+ }
71
+
72
+ public Map<String, Double> classify(Document document) {
73
+ Map<String, Double> likelihoodOf = new HashMap<>();
74
+ Map<String, Double> classPosteriorOf = new HashMap<>();
75
+ Map<String, Long> featureFrequencyMap = document.getFrequencyMap();
76
+ double evidence = 0;
55
77
 
56
78
  // Update likelihood counts
57
79
  for(String label : this.frequencyTable.getLabels()) {
58
80
  // Set initial likelihood
59
- likelihoodOf.put(label, 1d);
81
+ likelihoodOf.put(label, this.initialLikelihoodOf.get(label));
60
82
 
61
- // Calculate likelihoods
62
- for(String word : wordTable.keySet()) {
83
+ // Calculate actual likelihoods likelihoods
84
+ for(String word : featureFrequencyMap.keySet()) {
63
85
  double laplaceWordLikelihood =
64
86
  (this.frequencyTable.get(label, word) + 1d) /
65
87
  (this.instanceCountOf.get(label) + this.wordTable.size());
66
88
 
67
- // Update likelihood
89
+ // Update likelihood for words not in features
68
90
  double likelihood = likelihoodOf.get(label);
69
- if(frequencyMap.containsKey(word)) {
70
- likelihoodOf.put(label, likelihood * laplaceWordLikelihood);
71
- } else {
72
- likelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
91
+ if(featureFrequencyMap.containsKey(word)) {
92
+ likelihoodOf.put(label, (likelihood * laplaceWordLikelihood) / (1d - laplaceWordLikelihood));
73
93
  }
74
94
  }
75
95
 
76
96
  // Default class posterior of label to 1.0
77
- if(!classPosteriorOf.containsKey(label)) {
78
- classPosteriorOf.put(label, 1d);
79
- }
97
+ classPosteriorOf.put(label, 1d);
80
98
 
81
99
  // Update class posterior
82
- double classPosterior = classPriorOf.get(label) * likelihoodOf.get(label);
100
+ double classPosterior = this.classPriorOf.get(label) * likelihoodOf.get(label);
83
101
  classPosteriorOf.put(label, classPosterior);
84
102
  evidence += classPosterior;
85
103
  }
data/target/bae.jar CHANGED
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: java
6
6
  authors:
7
7
  - Garrett Thornburg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-18 00:00:00.000000000 Z
11
+ date: 2015-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement