te_rex 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/format/{corpus/basic_file.rb → basic_file.rb} +2 -2
- data/lib/format/error_file.rb +29 -0
- data/lib/te_rex/corpus.rb +64 -15
- data/lib/te_rex/version.rb +1 -1
- data/lib/te_rex.rb +3 -2
- data/test/bayes_data_test.rb +1 -1
- data/test/corpus_test.rb +57 -0
- data/test/sparse_bayes_test.rb +34 -29
- data/test/trained_bayes_test.rb +34 -29
- metadata +8 -21
- data/test/test_helper.rb +0 -9
- /data/lib/format/{corpus/brown_file.rb → brown_file.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 929e90be95279fcf5093f8d865457509bcc5e692
|
4
|
+
data.tar.gz: 98e1e7ad046d7e3f423934187b0df0546a6500a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad648071a76d79757ecdbc793a31753bdfdf94f6c0e8b462b9f315c8d514e9fdc07c942d2adae49dc20c7d35911554ff48c7f41c6804cf62c1727c16542d897a
|
7
|
+
data.tar.gz: a6397c1ea3b21735813c8f2cd739b889720d08746a5ea1903db1976890ea2c9eaac2e316ea886b2d6305c1e8ef3e02053c560b62dacaa3cfc5f0390094989893
|
@@ -2,7 +2,7 @@ module TeRex
|
|
2
2
|
module Format
|
3
3
|
class BasicFile
|
4
4
|
|
5
|
-
|
5
|
+
attr_reader :sentences, :path
|
6
6
|
|
7
7
|
def initialize(file_path)
|
8
8
|
@path = file_path
|
@@ -22,8 +22,8 @@ module TeRex
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
end
|
25
|
-
|
26
25
|
end
|
26
|
+
|
27
27
|
end
|
28
28
|
end
|
29
29
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Format
|
3
|
+
require 'csv'
|
4
|
+
class ErrorFile
|
5
|
+
|
6
|
+
attr_reader :sentences, :path
|
7
|
+
|
8
|
+
@@csv_conf = {:headers => true}
|
9
|
+
|
10
|
+
def initialize(file_path)
|
11
|
+
@path = file_path
|
12
|
+
end
|
13
|
+
|
14
|
+
# Each row of csv as Array object, strip it and return
|
15
|
+
def scanner
|
16
|
+
accumulator = []
|
17
|
+
CSV.foreach(@path, @@csv_conf) do |row|
|
18
|
+
next if row.empty?
|
19
|
+
stripped_line = row[0].strip
|
20
|
+
unless stripped_line.nil? || stripped_line.empty?
|
21
|
+
accumulator << stripped_line
|
22
|
+
end
|
23
|
+
end
|
24
|
+
@sentences ||= accumulator
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/te_rex/corpus.rb
CHANGED
@@ -2,39 +2,88 @@ module TeRex
|
|
2
2
|
module Corpus
|
3
3
|
class Body
|
4
4
|
|
5
|
-
|
5
|
+
attr_reader :set, :sample_size, :training, :testing, :format_klass, :category_klass, :total_sentences
|
6
6
|
|
7
|
-
def initialize(glob,
|
7
|
+
def initialize(glob: "", partition: :file, format_klass: NilClass, category_klass: NilClass)
|
8
8
|
@glob = glob
|
9
|
-
@
|
9
|
+
@format_klass = format_klass
|
10
|
+
@category_klass = category_klass
|
11
|
+
@partition = partition
|
10
12
|
end
|
11
13
|
|
14
|
+
#@sample_size = (@set.count * 0.75).round
|
12
15
|
def build
|
13
|
-
|
14
|
-
@
|
15
|
-
|
16
|
+
define_set
|
17
|
+
case @partition
|
18
|
+
when /file/
|
19
|
+
file_partition
|
20
|
+
else
|
21
|
+
sentence_partition
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def file_partition
|
26
|
+
@sample_size = (@set.count.to_f * 0.75).round
|
27
|
+
@training = partition_training_by_file
|
28
|
+
@testing = partition_test_by_file
|
29
|
+
count_all
|
30
|
+
end
|
31
|
+
|
32
|
+
def sentence_partition
|
33
|
+
#super_set = build_superset
|
34
|
+
corpus_set = partition_files_for_sentences
|
35
|
+
#@sample_size = (superset.count.to_f * 0.75).round
|
36
|
+
@sample_size = 0.0
|
37
|
+
@training = partition_training_by_sentence(corpus_set)
|
38
|
+
@testing = partition_test_by_sentence(corpus_set)
|
39
|
+
count_all
|
16
40
|
end
|
17
41
|
|
18
|
-
def
|
19
|
-
@
|
20
|
-
@
|
42
|
+
def define_set
|
43
|
+
@set ||= Dir[@glob].map do |file|
|
44
|
+
@format_klass.new(file)
|
21
45
|
end
|
22
|
-
@
|
23
|
-
|
46
|
+
@set
|
47
|
+
end
|
48
|
+
|
49
|
+
def partition_training_by_file
|
50
|
+
@set[0..@sample_size].map do |file|
|
51
|
+
file.scanner
|
52
|
+
end.flatten
|
24
53
|
end
|
25
54
|
|
26
|
-
def
|
27
|
-
@
|
55
|
+
def partition_test_by_file
|
56
|
+
@set[(@sample_size - 1)..-1].map do |file|
|
28
57
|
file.scanner
|
29
58
|
end.flatten
|
30
59
|
end
|
31
60
|
|
32
|
-
def
|
33
|
-
@
|
61
|
+
def partition_files_for_sentences
|
62
|
+
@set.map do |file|
|
34
63
|
file.scanner
|
35
64
|
end.flatten
|
36
65
|
end
|
37
66
|
|
67
|
+
def partition_training_by_sentence(c_set)
|
68
|
+
c_set.sample(c_set.count * 0.75)
|
69
|
+
end
|
70
|
+
|
71
|
+
def partition_test_by_sentence(c_set)
|
72
|
+
c_set.sample(c_set.count * 0.25)
|
73
|
+
end
|
74
|
+
|
75
|
+
def build_superset
|
76
|
+
@set.reduce([]) do |memo,formatter|
|
77
|
+
memo << formatter.sentences
|
78
|
+
end.flatten
|
79
|
+
end
|
80
|
+
|
81
|
+
def count_all
|
82
|
+
counter = 0
|
83
|
+
@set.map{|f| counter += f.sentences.count}
|
84
|
+
@total_sentences = counter
|
85
|
+
end
|
86
|
+
|
38
87
|
end
|
39
88
|
end
|
40
89
|
end
|
data/lib/te_rex/version.rb
CHANGED
data/lib/te_rex.rb
CHANGED
@@ -6,8 +6,9 @@
|
|
6
6
|
#end
|
7
7
|
|
8
8
|
require_relative "format/format"
|
9
|
-
require_relative "format/
|
10
|
-
require_relative "format/
|
9
|
+
require_relative "format/brown_file"
|
10
|
+
require_relative "format/basic_file"
|
11
|
+
require_relative "format/error_file"
|
11
12
|
require_relative "te_rex/stop_word"
|
12
13
|
require_relative "te_rex/alpha_num"
|
13
14
|
require_relative "te_rex/bayes_data"
|
data/test/bayes_data_test.rb
CHANGED
@@ -58,7 +58,7 @@ class BayesDataTest < MicroTest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
test "index frequency has correct counts" do
|
61
|
-
s =
|
61
|
+
s = 'Here is a sentence $141.34 that that $60 that 123.56 I need & & ^ % $c#@ to check the index is correct and okay.'
|
62
62
|
result = TeRex::Classifier::BayesData.index_frequency(s)
|
63
63
|
|
64
64
|
assert result[:moneyterm] == 3
|
data/test/corpus_test.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
#require_relative "../lib/te_rex"
|
2
|
+
class CorpusTest < MicroTest::Test
|
3
|
+
class MockErrorClassifier
|
4
|
+
end
|
5
|
+
|
6
|
+
@@error_corpus = TeRex::Corpus::Body.new(glob: "test/test_modules/*.csv", format_klass: TeRex::Format::ErrorFile, category_klass: MockErrorClassifier)
|
7
|
+
@@error_corpus.build
|
8
|
+
|
9
|
+
test "Corpus has correct data before building" do
|
10
|
+
assert @@error_corpus.format_klass.name == "TeRex::Format::ErrorFile"
|
11
|
+
assert @@error_corpus.category_klass.name == "CorpusTest::MockErrorClassifier"
|
12
|
+
end
|
13
|
+
|
14
|
+
test "total count of sentences is correct" do
|
15
|
+
assert @@error_corpus.total_sentences == 12
|
16
|
+
end
|
17
|
+
|
18
|
+
test "ratio of training to testing is within 70%" do
|
19
|
+
ratio = @@error_corpus.testing.count.to_f / @@error_corpus.training.count.to_f
|
20
|
+
assert (60...80).map{|i| i}.include?((ratio * 100).to_i)
|
21
|
+
end
|
22
|
+
|
23
|
+
test "sentence counts are correct" do
|
24
|
+
assert @@error_corpus.set.count == 3
|
25
|
+
assert @@error_corpus.training.count == 12
|
26
|
+
assert @@error_corpus.testing.count == 8
|
27
|
+
end
|
28
|
+
|
29
|
+
@@sent_corpus = TeRex::Corpus::Body.new(glob: "test/test_modules/*.csv", partition: :sentence, format_klass: TeRex::Format::ErrorFile, category_klass: MockErrorClassifier)
|
30
|
+
@@sent_corpus.build
|
31
|
+
|
32
|
+
test "Corpus has correct data before building" do
|
33
|
+
assert @@sent_corpus.format_klass.name == "TeRex::Format::ErrorFile"
|
34
|
+
assert @@sent_corpus.category_klass.name == "CorpusTest::MockErrorClassifier"
|
35
|
+
end
|
36
|
+
|
37
|
+
test "total count of sentences is correct" do
|
38
|
+
assert @@sent_corpus.total_sentences == 12
|
39
|
+
end
|
40
|
+
|
41
|
+
test "ratio of training to total is about 75%" do
|
42
|
+
ratio = @@sent_corpus.training.count.to_f / @@sent_corpus.total_sentences
|
43
|
+
assert (72...77).map{|i| i}.include?((ratio * 100).to_i)
|
44
|
+
end
|
45
|
+
|
46
|
+
test "ratio of training to total is about 25%" do
|
47
|
+
ratio = @@sent_corpus.testing.count.to_f / @@sent_corpus.total_sentences
|
48
|
+
assert (22...27).map{|i| i}.include?((ratio * 100).to_i)
|
49
|
+
end
|
50
|
+
|
51
|
+
test "sentence counts are correct" do
|
52
|
+
assert @@sent_corpus.set.count == 3
|
53
|
+
assert @@sent_corpus.training.count == 9
|
54
|
+
assert @@sent_corpus.testing.count == 3
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
data/test/sparse_bayes_test.rb
CHANGED
@@ -22,7 +22,12 @@ class SparseBayesTest < MicroTest::Test
|
|
22
22
|
"The cancellation policy will be determined when the rate is validated."
|
23
23
|
]
|
24
24
|
|
25
|
-
@@cls = TeRex::Classifier::Bayes.new(
|
25
|
+
@@cls = TeRex::Classifier::Bayes.new(
|
26
|
+
{:tag => "Refund", :msg => "We are pleased to offer you a refund"},
|
27
|
+
{:tag => "Partrefund", :msg => "You may receive a partial refund"},
|
28
|
+
{:tag => "Nonrefund", :msg => "Much apologies, no refund to you"},
|
29
|
+
{:tag => "Unknown", :msg => "Waht?"}
|
30
|
+
)
|
26
31
|
@@refund.each {|txt| @@cls.train("Refund", txt) }
|
27
32
|
@@partrefund.each {|txt| @@cls.train("Partrefund", txt) }
|
28
33
|
@@norefund.each {|txt| @@cls.train("Nonrefund", txt) }
|
@@ -40,15 +45,15 @@ class SparseBayesTest < MicroTest::Test
|
|
40
45
|
s_non1 = @@cls.classify(s_non)
|
41
46
|
s_unk1= @@cls.classify(s_unk)
|
42
47
|
|
43
|
-
assert s_refund1 == "Refund"
|
44
|
-
assert s_partial1 == "Partrefund"
|
45
|
-
assert s_non1 == "Nonrefund"
|
46
|
-
assert s_unk1 == "Unknown"
|
48
|
+
assert s_refund1 == ["Refund", "We are pleased to offer you a refund"]
|
49
|
+
assert s_partial1 == ["Partrefund", "You may receive a partial refund"]
|
50
|
+
assert s_non1 == ["Nonrefund", "Much apologies, no refund to you"]
|
51
|
+
assert s_unk1 == ["Unknown", "Waht?"]
|
47
52
|
|
48
|
-
assert s_refund1 != "Partrefund"
|
49
|
-
assert s_partial1 != "Refund"
|
50
|
-
assert s_non1 != "Unknown"
|
51
|
-
assert s_unk1 != "Nonrefund"
|
53
|
+
assert s_refund1 != ["Partrefund", "You may receive a partial refund"]
|
54
|
+
assert s_partial1 != ["Refund", "We are pleased to offer you a refund"]
|
55
|
+
assert s_non1 != ["Unknown", "Waht?"]
|
56
|
+
assert s_unk1 != ["Nonrefund", "Much apologies, no refund to you"]
|
52
57
|
end
|
53
58
|
|
54
59
|
|
@@ -64,10 +69,10 @@ class SparseBayesTest < MicroTest::Test
|
|
64
69
|
s33 = @@cls.classify(s3)
|
65
70
|
s44 = @@cls.classify(s4)
|
66
71
|
|
67
|
-
assert s11 == "Unknown"
|
68
|
-
assert s22 == "Unknown"
|
69
|
-
assert s33 == "Unknown"
|
70
|
-
assert s44 == "Unknown"
|
72
|
+
assert s11 == ["Unknown", "Waht?"]
|
73
|
+
assert s22 == ["Unknown", "Waht?"]
|
74
|
+
assert s33 == ["Unknown", "Waht?"]
|
75
|
+
assert s44 == ["Unknown", "Waht?"]
|
71
76
|
end
|
72
77
|
|
73
78
|
test "Sparse Data Set Test: Micro examples should return correct classification" do
|
@@ -82,15 +87,15 @@ class SparseBayesTest < MicroTest::Test
|
|
82
87
|
s33 = @@cls.classify(s3)
|
83
88
|
s44 = @@cls.classify(s4)
|
84
89
|
|
85
|
-
assert s11 == "Refund"
|
86
|
-
assert s22 == "Partrefund"
|
87
|
-
assert s33 == "Nonrefund"
|
88
|
-
assert s44 == "Unknown"
|
90
|
+
assert s11 == ["Refund", "We are pleased to offer you a refund"]
|
91
|
+
assert s22 == ["Partrefund","You may receive a partial refund"]
|
92
|
+
assert s33 == ["Nonrefund", "Much apologies, no refund to you"]
|
93
|
+
assert s44 == ["Unknown", "Waht?"]
|
89
94
|
|
90
|
-
assert s11 != "Partrefund"
|
91
|
-
assert s22 != "Refund"
|
92
|
-
assert s33 != "Unknown"
|
93
|
-
assert s44 != "Nonrefund"
|
95
|
+
assert s11 != ["Partrefund", "You may receive a partial refund"]
|
96
|
+
assert s22 != ["Refund", "We are pleased to offer you a refund"]
|
97
|
+
assert s33 != ["Unknown", "Waht?"]
|
98
|
+
assert s44 != ["Nonrefund", "Much apologies, no refund to you"]
|
94
99
|
end
|
95
100
|
|
96
101
|
|
@@ -106,18 +111,18 @@ test "Sparse Data Set Test: Micro examples should NOT match fake classes" do
|
|
106
111
|
s33 = @@cls.classify(s3)
|
107
112
|
s44 = @@cls.classify(s4)
|
108
113
|
|
109
|
-
assert s11 != "Computers"
|
110
|
-
assert s22 != "Science"
|
111
|
-
assert s33 != "Entertainment"
|
112
|
-
assert s44 != "Sports"
|
114
|
+
assert s11 != ["Computers", "computers yay!"]
|
115
|
+
assert s22 != ["Science", "science yay!"]
|
116
|
+
assert s33 != ["Entertainment", "entertainment yay!"]
|
117
|
+
assert s44 != ["Sports", "sports yay!"]
|
113
118
|
end
|
114
119
|
|
115
120
|
test "Sparse Data Set Test: Category counts are equivalent with number of training data per class" do
|
116
121
|
|
117
|
-
assert @@cls.category_counts[:Refund] == @@refund.count
|
118
|
-
assert @@cls.category_counts[:Partrefund] == @@partrefund.count
|
119
|
-
assert @@cls.category_counts[:Nonrefund] == @@norefund.count
|
120
|
-
assert @@cls.category_counts[:Unknown] == @@unknown.count
|
122
|
+
assert @@cls.category_counts[:Refund] == @@refund.count
|
123
|
+
assert @@cls.category_counts[:Partrefund] == @@partrefund.count
|
124
|
+
assert @@cls.category_counts[:Nonrefund] == @@norefund.count
|
125
|
+
assert @@cls.category_counts[:Unknown] == @@unknown.count
|
121
126
|
|
122
127
|
end
|
123
128
|
|
data/test/trained_bayes_test.rb
CHANGED
@@ -8,7 +8,12 @@ class TrainedBayesTest < MicroTest::Test
|
|
8
8
|
@@norefund = TeRex::Train::NONREFUND
|
9
9
|
@@unknown = TeRex::Train::UNKNOWN
|
10
10
|
|
11
|
-
@@cls = TeRex::Classifier::Bayes.new(
|
11
|
+
@@cls = TeRex::Classifier::Bayes.new(
|
12
|
+
{:tag => "Refund", :msg => "We are pleased to offer you a refund"},
|
13
|
+
{:tag => "Partrefund", :msg => "You may receive a partial refund"},
|
14
|
+
{:tag => "Nonrefund", :msg => "Much apologies, no refund to you"},
|
15
|
+
{:tag => "Unknown", :msg => "Waht?"}
|
16
|
+
)
|
12
17
|
@@refund.each {|txt| @@cls.train("Refund", txt) }
|
13
18
|
@@partrefund.each {|txt| @@cls.train("Partrefund", txt) }
|
14
19
|
@@norefund.each {|txt| @@cls.train("Nonrefund", txt) }
|
@@ -27,18 +32,18 @@ class TrainedBayesTest < MicroTest::Test
|
|
27
32
|
s_unk1= @@cls.classify(s_unk)
|
28
33
|
|
29
34
|
# We are lenient on Partrefund || Refund but we still want to see when it fails
|
30
|
-
assert s_refund1 == "Refund" || "Partrefund"
|
35
|
+
assert s_refund1 == ["Refund", "We are pleased to offer you a refund"] || ["Partrefund", "You may receive a partial refund"]
|
31
36
|
# We are lenient on Refund || Partrefund because of the non-distinctness of the two.
|
32
|
-
assert s_partial1 == "Partrefund" || "Refund"
|
33
|
-
assert s_non1 == "Nonrefund"
|
34
|
-
assert s_unk1 == "Unknown"
|
37
|
+
assert s_partial1 == ["Partrefund", "You may receive a partial refund"] || ["Refund", "We are pleased to offer you a refund"]
|
38
|
+
assert s_non1 == ["Nonrefund", "Much apologies, no refund to you"]
|
39
|
+
assert s_unk1 == ["Unknown", "Waht?"]
|
35
40
|
|
36
41
|
# We are lenient on Partrefund || Refund but we still want to see when it fails
|
37
|
-
assert s_refund1 != "Partrefund"
|
42
|
+
assert s_refund1 != ["Partrefund", "You may receive a partial refund"]
|
38
43
|
# We are lenient on Refund || Partrefund but we still want to see when it fails
|
39
|
-
assert s_partial1 != "Refund"
|
40
|
-
assert s_non1 != "Unknown"
|
41
|
-
assert s_unk1 != "Nonrefund"
|
44
|
+
assert s_partial1 != ["Refund", "We are pleased to offer you a refund"]
|
45
|
+
assert s_non1 != ["Unknown", "Waht?"]
|
46
|
+
assert s_unk1 != ["Nonrefund", "Much apologies, no refund to you"]
|
42
47
|
end
|
43
48
|
|
44
49
|
|
@@ -54,10 +59,10 @@ class TrainedBayesTest < MicroTest::Test
|
|
54
59
|
norefund_s11 = @@cls.classify(norefund_s1)
|
55
60
|
unk_s11 = @@cls.classify(unk_s1)
|
56
61
|
|
57
|
-
assert refund_s11 == "Refund"
|
58
|
-
assert partrefund_s11 == "Partrefund"
|
59
|
-
assert norefund_s11 == "Nonrefund"
|
60
|
-
assert unk_s11 == "Unknown"
|
62
|
+
assert refund_s11 == ["Refund", "We are pleased to offer you a refund"]
|
63
|
+
assert partrefund_s11 == ["Partrefund", "You may receive a partial refund"]
|
64
|
+
assert norefund_s11 == ["Nonrefund", "Much apologies, no refund to you"]
|
65
|
+
assert unk_s11 == ["Unknown", "Waht?"]
|
61
66
|
end
|
62
67
|
|
63
68
|
test "Training Data Set Test: Micro examples should return correct classification" do
|
@@ -72,15 +77,15 @@ class TrainedBayesTest < MicroTest::Test
|
|
72
77
|
s33 = @@cls.classify(s3)
|
73
78
|
s44 = @@cls.classify(s4)
|
74
79
|
|
75
|
-
assert s11 == "Refund"
|
76
|
-
assert s22 == "Partrefund"
|
77
|
-
assert s33 == "Nonrefund"
|
78
|
-
assert s44 == "Unknown"
|
80
|
+
assert s11 == ["Refund", "We are pleased to offer you a refund"]
|
81
|
+
assert s22 == ["Partrefund", "You may receive a partial refund"]
|
82
|
+
assert s33 == ["Nonrefund", "Much apologies, no refund to you"]
|
83
|
+
assert s44 == ["Unknown", "Waht?"]
|
79
84
|
|
80
|
-
assert s11 != "Partrefund"
|
81
|
-
assert s22 != "
|
82
|
-
assert s33 != "Unknown"
|
83
|
-
assert s44 != "
|
85
|
+
assert s11 != ["Partrefund", "You may receive a partial refund"]
|
86
|
+
assert s22 != ["Nonrefund", "Much apologies, no refund to you"]
|
87
|
+
assert s33 != ["Unknown", "Waht?"]
|
88
|
+
assert s44 != ["Refund", "We are pleased to offer you a refund"]
|
84
89
|
end
|
85
90
|
|
86
91
|
test "Training Data Set Test: Micro examples should NOT match fake classes" do
|
@@ -95,10 +100,10 @@ class TrainedBayesTest < MicroTest::Test
|
|
95
100
|
s33 = @@cls.classify(s3)
|
96
101
|
s44 = @@cls.classify(s4)
|
97
102
|
|
98
|
-
assert s11 != "Computers"
|
99
|
-
assert s22 != "Science"
|
100
|
-
assert s33 != "Entertainment"
|
101
|
-
assert s44 != "Sports"
|
103
|
+
assert s11 != ["Computers", "computers yay!"]
|
104
|
+
assert s22 != ["Science", "science yay!"]
|
105
|
+
assert s33 != ["Entertainment", "entertainment yay!"]
|
106
|
+
assert s44 != ["Sports", "sports yay!"]
|
102
107
|
end
|
103
108
|
|
104
109
|
test "Training Data Set Test: Ambiguous examples should return 'Unknown'" do
|
@@ -113,10 +118,10 @@ class TrainedBayesTest < MicroTest::Test
|
|
113
118
|
s33 = @@cls.classify(s3)
|
114
119
|
s44 = @@cls.classify(s4)
|
115
120
|
|
116
|
-
assert s11 == "Unknown"
|
117
|
-
assert s22 == "Unknown"
|
118
|
-
assert s33 == "Unknown"
|
119
|
-
assert s44 == "Unknown"
|
121
|
+
assert s11 == ["Unknown", "Waht?"]
|
122
|
+
assert s22 == ["Unknown", "Waht?"]
|
123
|
+
assert s33 == ["Unknown", "Waht?"]
|
124
|
+
assert s44 == ["Unknown", "Waht?"]
|
120
125
|
end
|
121
126
|
|
122
127
|
test "Training Data Set Test: Category counts are equivalent with number of training data per class" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: te_rex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joshua Bowles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fast-stemmer
|
@@ -150,20 +150,6 @@ dependencies:
|
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.4.9.1
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: simplecov
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - ">="
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
160
|
-
type: :development
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - ">="
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
167
153
|
description: Simple text processing for small data sets.
|
168
154
|
email:
|
169
155
|
- jbowayles@gmail.com
|
@@ -171,8 +157,9 @@ executables: []
|
|
171
157
|
extensions: []
|
172
158
|
extra_rdoc_files: []
|
173
159
|
files:
|
174
|
-
- lib/format/
|
175
|
-
- lib/format/
|
160
|
+
- lib/format/basic_file.rb
|
161
|
+
- lib/format/brown_file.rb
|
162
|
+
- lib/format/error_file.rb
|
176
163
|
- lib/format/format.rb
|
177
164
|
- lib/te_rex.rb
|
178
165
|
- lib/te_rex/alpha_num.rb
|
@@ -183,8 +170,8 @@ files:
|
|
183
170
|
- lib/te_rex/version.rb
|
184
171
|
- test/alpha_num_test.rb
|
185
172
|
- test/bayes_data_test.rb
|
173
|
+
- test/corpus_test.rb
|
186
174
|
- test/sparse_bayes_test.rb
|
187
|
-
- test/test_helper.rb
|
188
175
|
- test/test_modules/nonrefund.rb
|
189
176
|
- test/test_modules/partrefund.rb
|
190
177
|
- test/test_modules/refund.rb
|
@@ -210,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
210
197
|
version: '0'
|
211
198
|
requirements: []
|
212
199
|
rubyforge_project:
|
213
|
-
rubygems_version: 2.
|
200
|
+
rubygems_version: 2.4.3
|
214
201
|
signing_key:
|
215
202
|
specification_version: 4
|
216
203
|
summary: Basic NLP stuff for small data sets. Naive Bayes classification and corpora
|
@@ -218,8 +205,8 @@ summary: Basic NLP stuff for small data sets. Naive Bayes classification and cor
|
|
218
205
|
test_files:
|
219
206
|
- test/alpha_num_test.rb
|
220
207
|
- test/bayes_data_test.rb
|
208
|
+
- test/corpus_test.rb
|
221
209
|
- test/sparse_bayes_test.rb
|
222
|
-
- test/test_helper.rb
|
223
210
|
- test/test_modules/nonrefund.rb
|
224
211
|
- test/test_modules/partrefund.rb
|
225
212
|
- test/test_modules/refund.rb
|
data/test/test_helper.rb
DELETED
File without changes
|