bae 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gemignore +1 -0
- data/bae.gemspec +3 -1
- data/build.xml +38 -13
- data/lib/bae/classifier.rb +4 -2
- data/lib/bae/version.rb +1 -1
- data/src/test/java/bae/NaiveBayesClassifierTest.java +9 -2
- data/target/bae.jar +0 -0
- metadata +14 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7422728086a7f0fcebaaf67b5f30fff38646edd2ae4696f92bab4b9ab7ec9668
|
4
|
+
data.tar.gz: d089c5231adfc4932ca7e0e3a48af72b8437ff67b0c6285330ddbe453194dc90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ca7153d587cccd93c8b0c5fb501e881600cc59f2ff6ffa257b88fae8579fe5bc410b9146fa0114c56988ba5b1dc2e3fdfa88b89e00478c479bd80a1817f8708
|
7
|
+
data.tar.gz: 7ca71754f35caa12a3d905f63ae517256fd48a04d86def7cab979872777e1c611d146bd49c3c09ade7e38f98c9d84d4ea32e0e49f3bc84736814cdbe2cd37680
|
data/.gemignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
vendor/
|
data/bae.gemspec
CHANGED
@@ -13,7 +13,9 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.homepage = "https://github.com/film42/bae"
|
14
14
|
spec.license = "GPL version 3, or LGPL version 3 (Dual License)"
|
15
15
|
|
16
|
-
|
16
|
+
gem_files = `git ls-files -z`.split("\x0")
|
17
|
+
gem_ignored_files = `git ls-files -i -X .gemignore -z`.split("\x0")
|
18
|
+
spec.files = gem_files - gem_ignored_files
|
17
19
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
21
|
spec.require_paths = ["lib"]
|
data/build.xml
CHANGED
@@ -1,18 +1,43 @@
|
|
1
|
-
<project>
|
1
|
+
<project name="bae">
|
2
|
+
<path id="classpath.test">
|
3
|
+
<pathelement location="vendor/junit-4.12.jar"/>
|
4
|
+
<pathelement location="vendor/hamcrest-core-1.3.jar"/>
|
5
|
+
<pathelement location="out/classes/"/>
|
6
|
+
</path>
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
8
|
+
<target name="clean">
|
9
|
+
<delete dir="out/classes"/>
|
10
|
+
</target>
|
6
11
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
+
<target name="compile" depends="clean">
|
13
|
+
<mkdir dir="out"/>
|
14
|
+
<mkdir dir="out/classes"/>
|
15
|
+
<javac srcdir="src/main/java" destdir="out/classes" source="1.7" target="1.7" includeantruntime="false" debug="on" />
|
16
|
+
</target>
|
12
17
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
18
|
+
<target name="jar" depends="compile">
|
19
|
+
<mkdir dir="target"/>
|
20
|
+
<jar destfile="target/bae.jar" basedir="out/classes" excludes="**/*.jar,**/MANIFEST.MF,**/BCKEY.*" />
|
21
|
+
</target>
|
17
22
|
|
23
|
+
<target name="test" depends="compile">
|
24
|
+
<mkdir dir="out"/>
|
25
|
+
<mkdir dir="out/test/"/>
|
26
|
+
<mkdir dir="out/test/classes"/>
|
27
|
+
<javac srcdir="src/test/java" debug="on" destdir="out/test/classes" includeantruntime="false">
|
28
|
+
<classpath refid="classpath.test"/>
|
29
|
+
</javac>
|
30
|
+
|
31
|
+
<junit printsummary="on" haltonfailure="yes" fork="true">
|
32
|
+
<classpath>
|
33
|
+
<path refid="classpath.test"/>
|
34
|
+
<pathelement location="out/test/classes"/>
|
35
|
+
<pathelement location="out/classes"/>
|
36
|
+
</classpath>
|
37
|
+
<formatter type="brief" usefile="false" />
|
38
|
+
<batchtest>
|
39
|
+
<fileset dir="src/test/java" includes="**/*Test.java" />
|
40
|
+
</batchtest>
|
41
|
+
</junit>
|
42
|
+
</target>
|
18
43
|
</project>
|
data/lib/bae/classifier.rb
CHANGED
@@ -13,6 +13,8 @@ module Bae
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def finish_training!
|
16
|
+
@frequency_table_size = @frequency_table.keys.size
|
17
|
+
|
16
18
|
calculate_likelihoods!
|
17
19
|
calculate_priors!
|
18
20
|
end
|
@@ -68,7 +70,7 @@ module Bae
|
|
68
70
|
likelihoods = @likelihoods.dup
|
69
71
|
posterior = {}
|
70
72
|
|
71
|
-
vocab_size =
|
73
|
+
vocab_size = @frequency_table_size
|
72
74
|
|
73
75
|
label_index.each do |label, index|
|
74
76
|
words.map do |word|
|
@@ -130,7 +132,7 @@ module Bae
|
|
130
132
|
def calculate_likelihoods!
|
131
133
|
@likelihoods = label_index.inject({}) do |accumulator, (label, index)|
|
132
134
|
initial_likelihood = 1.0
|
133
|
-
vocab_size =
|
135
|
+
vocab_size = @frequency_table_size
|
134
136
|
|
135
137
|
frequency_table.each do |feature, row|
|
136
138
|
laplace_word_likelihood = (row[index] + 1.0).to_f / (label_instance_count[label] + vocab_size).to_f
|
data/lib/bae/version.rb
CHANGED
@@ -18,6 +18,7 @@ public class NaiveBayesClassifierTest {
|
|
18
18
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
19
19
|
n.train("positive", new Document("aaa bbb bbb"));
|
20
20
|
n.train("negative", new Document("ccc ccc ccc ddd ddd ddd ddd"));
|
21
|
+
n.calculateInitialLikelihoods();
|
21
22
|
|
22
23
|
assertEquals(1, n.getFrequencyTable().get("positive", "aaa"));
|
23
24
|
assertEquals(3, n.getFrequencyTable().get("positive", "bbb"));
|
@@ -33,6 +34,7 @@ public class NaiveBayesClassifierTest {
|
|
33
34
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
34
35
|
n.train("positive", new Document("aaa bbb bbb"));
|
35
36
|
n.train("negative", new Document("ccc ccc ccc ddd ddd ddd ddd"));
|
37
|
+
n.calculateInitialLikelihoods();
|
36
38
|
|
37
39
|
assertEquals(1, (long)n.getWordTable().get("aaa"));
|
38
40
|
assertEquals(1, (long)n.getWordTable().get("bbb"));
|
@@ -49,6 +51,7 @@ public class NaiveBayesClassifierTest {
|
|
49
51
|
n.train("positive", new Document("aaa bbb bbb"));
|
50
52
|
n.train("negative", new Document("ccc ccc ccc ddd ddd ddd ddd"));
|
51
53
|
n.train("negative", new Document("ccc ccc ccc ddd ddd"));
|
54
|
+
n.calculateInitialLikelihoods();
|
52
55
|
|
53
56
|
assertEquals(2, (long)n.getInstanceCount().get("positive"));
|
54
57
|
assertEquals(3, (long)n.getInstanceCount().get("negative"));
|
@@ -64,6 +67,7 @@ public class NaiveBayesClassifierTest {
|
|
64
67
|
|
65
68
|
n.train("positive", d);
|
66
69
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
70
|
+
n.calculateInitialLikelihoods();
|
67
71
|
|
68
72
|
Map<String, Double> results = n.classify(new Document("aaa bbb"));
|
69
73
|
|
@@ -80,6 +84,7 @@ public class NaiveBayesClassifierTest {
|
|
80
84
|
|
81
85
|
n.train("positive", d);
|
82
86
|
n.train("negative", new Document("ccc ccc ddd ddd ddd"));
|
87
|
+
n.calculateInitialLikelihoods();
|
83
88
|
|
84
89
|
Map<String, Double> results = n.classify(new Document("ccc ccc ccc ddd ddd ddd"));
|
85
90
|
|
@@ -90,10 +95,10 @@ public class NaiveBayesClassifierTest {
|
|
90
95
|
@Test
|
91
96
|
public void canCorrectlyClassifyPositiveWithThreeLabels() {
|
92
97
|
NaiveBayesClassifier n = new NaiveBayesClassifier();
|
93
|
-
|
94
98
|
n.train("positive", new Document("aaa aaa bbb"));
|
95
99
|
n.train("negative", new Document("ccc ccc ddd ddd"));
|
96
100
|
n.train("neutral", new Document("eee eee eee fff fff fff"));
|
101
|
+
n.calculateInitialLikelihoods();
|
97
102
|
|
98
103
|
Map<String, Double> results = n.classify(new Document("aaa bbb"));
|
99
104
|
|
@@ -109,6 +114,7 @@ public class NaiveBayesClassifierTest {
|
|
109
114
|
n.train("positive", new Document("aaa aaa bbb"));
|
110
115
|
n.train("negative", new Document("ccc ccc ddd ddd"));
|
111
116
|
n.train("neutral", new Document("eee eee eee fff fff fff"));
|
117
|
+
n.calculateInitialLikelihoods();
|
112
118
|
|
113
119
|
Map<String, Double> results = n.classify(new Document("ccc ccc ccc ddd ddd"));
|
114
120
|
|
@@ -124,6 +130,7 @@ public class NaiveBayesClassifierTest {
|
|
124
130
|
n.train("positive", new Document("aaa aaa bbb"));
|
125
131
|
n.train("negative", new Document("ccc ccc ddd ddd"));
|
126
132
|
n.train("neutral", new Document("eee eee eee fff fff fff"));
|
133
|
+
n.calculateInitialLikelihoods();
|
127
134
|
|
128
135
|
Map<String, Double> results = n.classify(new Document("aaa ddd ddd eee eee eee fff"));
|
129
136
|
|
@@ -131,4 +138,4 @@ public class NaiveBayesClassifierTest {
|
|
131
138
|
assertEquals(0.09756097560975606, results.get("negative"), 0.00001);
|
132
139
|
assertEquals(0.7804878048780488, results.get("neutral"), 0.00001);
|
133
140
|
}
|
134
|
-
}
|
141
|
+
}
|
data/target/bae.jar
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,52 +1,52 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garrett Thornburg
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
14
15
|
requirement: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - "~>"
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.6'
|
19
|
-
name: bundler
|
20
|
-
prerelease: false
|
21
20
|
type: :development
|
21
|
+
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
28
29
|
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
|
-
name: rspec
|
34
|
-
prerelease: false
|
35
34
|
type: :development
|
35
|
+
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
42
43
|
requirement: !ruby/object:Gem::Requirement
|
43
44
|
requirements:
|
44
45
|
- - ">="
|
45
46
|
- !ruby/object:Gem::Version
|
46
47
|
version: '0'
|
47
|
-
name: rake
|
48
|
-
prerelease: false
|
49
48
|
type: :development
|
49
|
+
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
@@ -59,6 +59,7 @@ executables: []
|
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
|
+
- ".gemignore"
|
62
63
|
- ".gitignore"
|
63
64
|
- Gemfile
|
64
65
|
- LICENSE.txt
|
@@ -84,7 +85,7 @@ homepage: https://github.com/film42/bae
|
|
84
85
|
licenses:
|
85
86
|
- GPL version 3, or LGPL version 3 (Dual License)
|
86
87
|
metadata: {}
|
87
|
-
post_install_message:
|
88
|
+
post_install_message:
|
88
89
|
rdoc_options: []
|
89
90
|
require_paths:
|
90
91
|
- lib
|
@@ -99,9 +100,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
100
|
- !ruby/object:Gem::Version
|
100
101
|
version: '0'
|
101
102
|
requirements: []
|
102
|
-
rubyforge_project:
|
103
|
-
rubygems_version: 2.6
|
104
|
-
signing_key:
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 2.7.6
|
105
|
+
signing_key:
|
105
106
|
specification_version: 4
|
106
107
|
summary: Multinomial naive bayes classifier with a kick of java
|
107
108
|
test_files:
|