bae 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gemignore +1 -0
- data/bae.gemspec +3 -1
- data/build.xml +38 -13
- data/lib/bae/classifier.rb +4 -2
- data/lib/bae/version.rb +1 -1
- data/src/test/java/bae/NaiveBayesClassifierTest.java +9 -2
- data/target/bae.jar +0 -0
- metadata +14 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7422728086a7f0fcebaaf67b5f30fff38646edd2ae4696f92bab4b9ab7ec9668
|
4
|
+
data.tar.gz: d089c5231adfc4932ca7e0e3a48af72b8437ff67b0c6285330ddbe453194dc90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ca7153d587cccd93c8b0c5fb501e881600cc59f2ff6ffa257b88fae8579fe5bc410b9146fa0114c56988ba5b1dc2e3fdfa88b89e00478c479bd80a1817f8708
|
7
|
+
data.tar.gz: 7ca71754f35caa12a3d905f63ae517256fd48a04d86def7cab979872777e1c611d146bd49c3c09ade7e38f98c9d84d4ea32e0e49f3bc84736814cdbe2cd37680
|
data/.gemignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
vendor/
|
data/bae.gemspec
CHANGED
@@ -13,7 +13,9 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.homepage = "https://github.com/film42/bae"
|
14
14
|
spec.license = "GPL version 3, or LGPL version 3 (Dual License)"
|
15
15
|
|
16
|
-
|
16
|
+
gem_files = `git ls-files -z`.split("\x0")
|
17
|
+
gem_ignored_files = `git ls-files -i -X .gemignore -z`.split("\x0")
|
18
|
+
spec.files = gem_files - gem_ignored_files
|
17
19
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
21
|
spec.require_paths = ["lib"]
|
data/build.xml
CHANGED
@@ -1,18 +1,43 @@
|
|
1
|
-
<project>
|
1
|
+
<project name="bae">
|
2
|
+
<path id="classpath.test">
|
3
|
+
<pathelement location="vendor/junit-4.12.jar"/>
|
4
|
+
<pathelement location="vendor/hamcrest-core-1.3.jar"/>
|
5
|
+
<pathelement location="out/classes/"/>
|
6
|
+
</path>
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
8
|
+
<target name="clean">
|
9
|
+
<delete dir="out/classes"/>
|
10
|
+
</target>
|
6
11
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
+
<target name="compile" depends="clean">
|
13
|
+
<mkdir dir="out"/>
|
14
|
+
<mkdir dir="out/classes"/>
|
15
|
+
<javac srcdir="src/main/java" destdir="out/classes" source="1.7" target="1.7" includeantruntime="false" debug="on" />
|
16
|
+
</target>
|
12
17
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
18
|
+
<target name="jar" depends="compile">
|
19
|
+
<mkdir dir="target"/>
|
20
|
+
<jar destfile="target/bae.jar" basedir="out/classes" excludes="**/*.jar,**/MANIFEST.MF,**/BCKEY.*" />
|
21
|
+
</target>
|
17
22
|
|
23
|
+
<target name="test" depends="compile">
|
24
|
+
<mkdir dir="out"/>
|
25
|
+
<mkdir dir="out/test/"/>
|
26
|
+
<mkdir dir="out/test/classes"/>
|
27
|
+
<javac srcdir="src/test/java" debug="on" destdir="out/test/classes" includeantruntime="false">
|
28
|
+
<classpath refid="classpath.test"/>
|
29
|
+
</javac>
|
30
|
+
|
31
|
+
<junit printsummary="on" haltonfailure="yes" fork="true">
|
32
|
+
<classpath>
|
33
|
+
<path refid="classpath.test"/>
|
34
|
+
<pathelement location="out/test/classes"/>
|
35
|
+
<pathelement location="out/classes"/>
|
36
|
+
</classpath>
|
37
|
+
<formatter type="brief" usefile="false" />
|
38
|
+
<batchtest>
|
39
|
+
<fileset dir="src/test/java" includes="**/*Test.java" />
|
40
|
+
</batchtest>
|
41
|
+
</junit>
|
42
|
+
</target>
|
18
43
|
</project>
|
data/lib/bae/classifier.rb
CHANGED
@@ -13,6 +13,8 @@ module Bae
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def finish_training!
|
16
|
+
@frequency_table_size = @frequency_table.keys.size
|
17
|
+
|
16
18
|
calculate_likelihoods!
|
17
19
|
calculate_priors!
|
18
20
|
end
|
@@ -68,7 +70,7 @@ module Bae
|
|
68
70
|
likelihoods = @likelihoods.dup
|
69
71
|
posterior = {}
|
70
72
|
|
71
|
-
vocab_size =
|
73
|
+
vocab_size = @frequency_table_size
|
72
74
|
|
73
75
|
label_index.each do |label, index|
|
74
76
|
words.map do |word|
|
@@ -130,7 +132,7 @@ module Bae
|
|
130
132
|
def calculate_likelihoods!
|
131
133
|
@likelihoods = label_index.inject({}) do |accumulator, (label, index)|
|
132
134
|
initial_likelihood = 1.0
|
133
|
-
vocab_size =
|
135
|
+
vocab_size = @frequency_table_size
|
134
136
|
|
135
137
|
frequency_table.each do |feature, row|
|
136
138
|
laplace_word_likelihood = (row[index] + 1.0).to_f / (label_instance_count[label] + vocab_size).to_f
|
data/lib/bae/version.rb
CHANGED
@@ -18,6 +18,7 @@ public class NaiveBayesClassifierTest {
|
|
18
18
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
19
19
|
n.train("positive", new Document("aaa bbb bbb"));
|
20
20
|
n.train("negative", new Document("ccc ccc ccc ddd ddd ddd ddd"));
|
21
|
+
n.calculateInitialLikelihoods();
|
21
22
|
|
22
23
|
assertEquals(1, n.getFrequencyTable().get("positive", "aaa"));
|
23
24
|
assertEquals(3, n.getFrequencyTable().get("positive", "bbb"));
|
@@ -33,6 +34,7 @@ public class NaiveBayesClassifierTest {
|
|
33
34
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
34
35
|
n.train("positive", new Document("aaa bbb bbb"));
|
35
36
|
n.train("negative", new Document("ccc ccc ccc ddd ddd ddd ddd"));
|
37
|
+
n.calculateInitialLikelihoods();
|
36
38
|
|
37
39
|
assertEquals(1, (long)n.getWordTable().get("aaa"));
|
38
40
|
assertEquals(1, (long)n.getWordTable().get("bbb"));
|
@@ -49,6 +51,7 @@ public class NaiveBayesClassifierTest {
|
|
49
51
|
n.train("positive", new Document("aaa bbb bbb"));
|
50
52
|
n.train("negative", new Document("ccc ccc ccc ddd ddd ddd ddd"));
|
51
53
|
n.train("negative", new Document("ccc ccc ccc ddd ddd"));
|
54
|
+
n.calculateInitialLikelihoods();
|
52
55
|
|
53
56
|
assertEquals(2, (long)n.getInstanceCount().get("positive"));
|
54
57
|
assertEquals(3, (long)n.getInstanceCount().get("negative"));
|
@@ -64,6 +67,7 @@ public class NaiveBayesClassifierTest {
|
|
64
67
|
|
65
68
|
n.train("positive", d);
|
66
69
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
70
|
+
n.calculateInitialLikelihoods();
|
67
71
|
|
68
72
|
Map<String, Double> results = n.classify(new Document("aaa bbb"));
|
69
73
|
|
@@ -80,6 +84,7 @@ public class NaiveBayesClassifierTest {
|
|
80
84
|
|
81
85
|
n.train("positive", d);
|
82
86
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
87
|
+
n.calculateInitialLikelihoods();
|
83
88
|
|
84
89
|
Map<String, Double> results = n.classify(new Document("ccc ccc ccc ddd ddd ddd"));
|
85
90
|
|
@@ -90,10 +95,10 @@ public class NaiveBayesClassifierTest {
|
|
90
95
|
@Test
|
91
96
|
public void canCorrectlyClassifyPositiveWithThreeLabels() {
|
92
97
|
NaiveBayesClassifier n = new NaiveBayesClassifier();
|
93
|
-
|
94
98
|
n.train("positive", new Document("aaa aaa bbb"));
|
95
99
|
n.train("negative", new Document("ccc ccc ddd ddd"));
|
96
100
|
n.train("neutral", new Document("eee eee eee fff fff fff"));
|
101
|
+
n.calculateInitialLikelihoods();
|
97
102
|
|
98
103
|
Map<String, Double> results = n.classify(new Document("aaa bbb"));
|
99
104
|
|
@@ -109,6 +114,7 @@ public class NaiveBayesClassifierTest {
|
|
109
114
|
n.train("positive", new Document("aaa aaa bbb"));
|
110
115
|
n.train("negative", new Document("ccc ccc ddd ddd"));
|
111
116
|
n.train("neutral", new Document("eee eee eee fff fff fff"));
|
117
|
+
n.calculateInitialLikelihoods();
|
112
118
|
|
113
119
|
Map<String, Double> results = n.classify(new Document("ccc ccc ccc ddd ddd"));
|
114
120
|
|
@@ -124,6 +130,7 @@ public class NaiveBayesClassifierTest {
|
|
124
130
|
n.train("positive", new Document("aaa aaa bbb"));
|
125
131
|
n.train("negative", new Document("ccc ccc ddd ddd"));
|
126
132
|
n.train("neutral", new Document("eee eee eee fff fff fff"));
|
133
|
+
n.calculateInitialLikelihoods();
|
127
134
|
|
128
135
|
Map<String, Double> results = n.classify(new Document("aaa ddd ddd eee eee eee fff"));
|
129
136
|
|
@@ -131,4 +138,4 @@ public class NaiveBayesClassifierTest {
|
|
131
138
|
assertEquals(0.09756097560975606, results.get("negative"), 0.00001);
|
132
139
|
assertEquals(0.7804878048780488, results.get("neutral"), 0.00001);
|
133
140
|
}
|
134
|
-
}
|
141
|
+
}
|
data/target/bae.jar
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,52 +1,52 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garrett Thornburg
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
14
15
|
requirement: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - "~>"
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.6'
|
19
|
-
name: bundler
|
20
|
-
prerelease: false
|
21
20
|
type: :development
|
21
|
+
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
28
29
|
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
|
-
name: rspec
|
34
|
-
prerelease: false
|
35
34
|
type: :development
|
35
|
+
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
42
43
|
requirement: !ruby/object:Gem::Requirement
|
43
44
|
requirements:
|
44
45
|
- - ">="
|
45
46
|
- !ruby/object:Gem::Version
|
46
47
|
version: '0'
|
47
|
-
name: rake
|
48
|
-
prerelease: false
|
49
48
|
type: :development
|
49
|
+
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
@@ -59,6 +59,7 @@ executables: []
|
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
|
+
- ".gemignore"
|
62
63
|
- ".gitignore"
|
63
64
|
- Gemfile
|
64
65
|
- LICENSE.txt
|
@@ -84,7 +85,7 @@ homepage: https://github.com/film42/bae
|
|
84
85
|
licenses:
|
85
86
|
- GPL version 3, or LGPL version 3 (Dual License)
|
86
87
|
metadata: {}
|
87
|
-
post_install_message:
|
88
|
+
post_install_message:
|
88
89
|
rdoc_options: []
|
89
90
|
require_paths:
|
90
91
|
- lib
|
@@ -99,9 +100,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
100
|
- !ruby/object:Gem::Version
|
100
101
|
version: '0'
|
101
102
|
requirements: []
|
102
|
-
rubyforge_project:
|
103
|
-
rubygems_version: 2.6
|
104
|
-
signing_key:
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 2.7.6
|
105
|
+
signing_key:
|
105
106
|
specification_version: 4
|
106
107
|
summary: Multinomial naive bayes classifier with a kick of java
|
107
108
|
test_files:
|