bae 0.0.6-java → 0.0.7-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f7b557eff0f63a4dfa9007451f932293eb80ece
4
- data.tar.gz: 33f009d0119de54ae777e5f76537ca2661dcd047
3
+ metadata.gz: 71c761f2619746bfc4dd287a5afa5443a7bfe037
4
+ data.tar.gz: 886257c2c8987fd8e95edbb63105ef87d7960de5
5
5
  SHA512:
6
- metadata.gz: 741fb65cac66f3f47a536e20618dd4dd75880079616652e57651f139c5ad384ca2e508c84d86bf813176e8b39ef590810a8daa33a0dda56b67ffd490fb7a530d
7
- data.tar.gz: bc8e0e209dd1b88995e39cbb5d6d18db1437b519599b71c1f6fef06aa22af770da3f6fe7d45c897682056755a5d97023d3c073b3d9329e74ab6407b157173d66
6
+ metadata.gz: e02048771022daa4b61097ae500831a671f1a6ec3d8e9e48f235efd7ff9902be31678e503b4bd31b6cd0a0526d61c2868f1af39cb619c8c9fb120517242928cc
7
+ data.tar.gz: c2c7073db8b7afeea466a5aa9db9f157101a9a8e9b38d87787281c204990eb3c4b3f75b64a9ecb2fd7a8dc5629bd807d101b0ea702c38d735ba52e2595bdc822
data/README.md CHANGED
@@ -28,6 +28,9 @@ You can refer to ["naivebayes"](https://github.com/id774/naivebayes) gem for mor
28
28
  classifier = ::Bae::Classifier.new
29
29
  classifier.train("positive", {"aaa" => 0, "bbb" => 1})
30
30
  classifier.train("negative", {"ccc" => 2, "ddd" => 3})
31
+
32
+ classifier.finish_training!
33
+
31
34
  classifier.classify({"aaa" => 1, "bbb" => 1})
32
35
 
33
36
  #=> {"positive" => 0.8767123287671234, "negative" => 0.12328767123287669}
@@ -39,6 +42,9 @@ classifier = ::Bae::Classifier.new
39
42
  classifier.train("positive", "aaa aaa bbb");
40
43
  classifier.train("negative", "ccc ccc ddd ddd");
41
44
  classifier.train("neutral", "eee eee eee fff fff fff");
45
+
46
+ classifier.finish_training!
47
+
42
48
  classifier.classify("aaa bbb")
43
49
 
44
50
  #=> {"positive"=>0.8962655601659751, "negative"=>0.0663900414937759, "neutral"=>0.037344398340248955}
@@ -15,5 +15,9 @@ module Bae
15
15
  internal_classifier.classify(::Java::Bae::Document.new(feature))
16
16
  end
17
17
 
18
+ def finish_training!
19
+ internal_classifier.calculateInitialLikelihoods()
20
+ end
21
+
18
22
  end
19
23
  end
data/lib/bae/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Bae
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -7,6 +7,9 @@ describe ::Bae::Classifier do
7
7
  it "can classify from ruby to java with a hash document" do
8
8
  subject.train("positive", {"aaa" => 0, "bbb" => 1})
9
9
  subject.train("negative", {"ccc" => 2, "ddd" => 3})
10
+
11
+ subject.finish_training!
12
+
10
13
  results = subject.classify({"aaa" => 1, "bbb" => 1})
11
14
 
12
15
  expect(results["positive"]).to be_within(0.001).of(0.94117)
@@ -17,6 +20,9 @@ describe ::Bae::Classifier do
17
20
  subject.train("positive", "aaa aaa bbb");
18
21
  subject.train("negative", "ccc ccc ddd ddd");
19
22
  subject.train("neutral", "eee eee eee fff fff fff");
23
+
24
+ subject.finish_training!
25
+
20
26
  results = subject.classify("aaa bbb")
21
27
 
22
28
  expect(results["positive"]).to be_within(0.001).of(0.89626)
@@ -8,12 +8,16 @@ public class NaiveBayesClassifier {
8
8
  private FrequencyTable frequencyTable;
9
9
  private Map<String, Long> wordTable;
10
10
  private Map<String, Long> instanceCountOf;
11
+ private Map<String, Double> initialLikelihoodOf;
12
+ Map<String, Double> classPriorOf;
11
13
  private double totalCount = 0;
12
14
 
13
15
  public NaiveBayesClassifier() {
14
16
  this.frequencyTable = new FrequencyTable();
15
17
  this.wordTable = new HashMap<>();
16
18
  this.instanceCountOf = new HashMap<>();
19
+ this.initialLikelihoodOf = new HashMap<>();
20
+ this.classPriorOf = new HashMap<>();
17
21
  }
18
22
 
19
23
  public void train(String label, Document document) {
@@ -37,12 +41,23 @@ public class NaiveBayesClassifier {
37
41
  updateIntegerCountBy(this.instanceCountOf, label, 1);
38
42
  }
39
43
 
40
- public Map<String, Double> classify(Document document) {
41
- Map<String, Double> classPriorOf = new HashMap<>();
42
- Map<String, Double> likelihoodOf = new HashMap<>();
43
- Map<String, Double> classPosteriorOf = new HashMap<>();
44
- Map<String, Long> frequencyMap = document.getFrequencyMap();
45
- double evidence = 0;
44
+ public void calculateInitialLikelihoods() {
45
+ // Update likelihood counts
46
+ for(String label : this.frequencyTable.getLabels()) {
47
+ // Set initial likelihood
48
+ initialLikelihoodOf.put(label, 1d);
49
+
50
+ // Calculate likelihoods
51
+ for (String word : this.wordTable.keySet()) {
52
+ double laplaceWordLikelihood =
53
+ (this.frequencyTable.get(label, word) + 1d) /
54
+ (this.instanceCountOf.get(label) + this.wordTable.size());
55
+
56
+ // Update likelihood
57
+ double likelihood = initialLikelihoodOf.get(label);
58
+ initialLikelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
59
+ }
60
+ }
46
61
 
47
62
  // Update the prior
48
63
  for(Map.Entry<String, Long> entry : this.instanceCountOf.entrySet()) {
@@ -50,36 +65,39 @@ public class NaiveBayesClassifier {
50
65
  double frequency = entry.getValue();
51
66
 
52
67
  // Update instance count
53
- classPriorOf.put(label, (frequency / this.totalCount));
68
+ this.classPriorOf.put(label, (frequency / this.totalCount));
54
69
  }
70
+ }
71
+
72
+ public Map<String, Double> classify(Document document) {
73
+ Map<String, Double> likelihoodOf = new HashMap<>();
74
+ Map<String, Double> classPosteriorOf = new HashMap<>();
75
+ Map<String, Long> featureFrequencyMap = document.getFrequencyMap();
76
+ double evidence = 0;
55
77
 
56
78
  // Update likelihood counts
57
79
  for(String label : this.frequencyTable.getLabels()) {
58
80
  // Set initial likelihood
59
- likelihoodOf.put(label, 1d);
81
+ likelihoodOf.put(label, this.initialLikelihoodOf.get(label));
60
82
 
61
- // Calculate likelihoods
62
- for(String word : wordTable.keySet()) {
83
+ // Calculate actual likelihoods likelihoods
84
+ for(String word : featureFrequencyMap.keySet()) {
63
85
  double laplaceWordLikelihood =
64
86
  (this.frequencyTable.get(label, word) + 1d) /
65
87
  (this.instanceCountOf.get(label) + this.wordTable.size());
66
88
 
67
- // Update likelihood
89
+ // Update likelihood for words not in features
68
90
  double likelihood = likelihoodOf.get(label);
69
- if(frequencyMap.containsKey(word)) {
70
- likelihoodOf.put(label, likelihood * laplaceWordLikelihood);
71
- } else {
72
- likelihoodOf.put(label, likelihood * (1d - laplaceWordLikelihood));
91
+ if(featureFrequencyMap.containsKey(word)) {
92
+ likelihoodOf.put(label, (likelihood * laplaceWordLikelihood) / (1d - laplaceWordLikelihood));
73
93
  }
74
94
  }
75
95
 
76
96
  // Default class posterior of label to 1.0
77
- if(!classPosteriorOf.containsKey(label)) {
78
- classPosteriorOf.put(label, 1d);
79
- }
97
+ classPosteriorOf.put(label, 1d);
80
98
 
81
99
  // Update class posterior
82
- double classPosterior = classPriorOf.get(label) * likelihoodOf.get(label);
100
+ double classPosterior = this.classPriorOf.get(label) * likelihoodOf.get(label);
83
101
  classPosteriorOf.put(label, classPosterior);
84
102
  evidence += classPosterior;
85
103
  }
data/target/bae.jar CHANGED
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: java
6
6
  authors:
7
7
  - Garrett Thornburg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-18 00:00:00.000000000 Z
11
+ date: 2015-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement