te_rex 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/format/{corpus/basic_file.rb → basic_file.rb} +2 -2
- data/lib/format/error_file.rb +29 -0
- data/lib/te_rex/corpus.rb +64 -15
- data/lib/te_rex/version.rb +1 -1
- data/lib/te_rex.rb +3 -2
- data/test/bayes_data_test.rb +1 -1
- data/test/corpus_test.rb +57 -0
- data/test/sparse_bayes_test.rb +34 -29
- data/test/trained_bayes_test.rb +34 -29
- metadata +8 -21
- data/test/test_helper.rb +0 -9
- /data/lib/format/{corpus/brown_file.rb → brown_file.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 929e90be95279fcf5093f8d865457509bcc5e692
|
4
|
+
data.tar.gz: 98e1e7ad046d7e3f423934187b0df0546a6500a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad648071a76d79757ecdbc793a31753bdfdf94f6c0e8b462b9f315c8d514e9fdc07c942d2adae49dc20c7d35911554ff48c7f41c6804cf62c1727c16542d897a
|
7
|
+
data.tar.gz: a6397c1ea3b21735813c8f2cd739b889720d08746a5ea1903db1976890ea2c9eaac2e316ea886b2d6305c1e8ef3e02053c560b62dacaa3cfc5f0390094989893
|
@@ -2,7 +2,7 @@ module TeRex
|
|
2
2
|
module Format
|
3
3
|
class BasicFile
|
4
4
|
|
5
|
-
|
5
|
+
attr_reader :sentences, :path
|
6
6
|
|
7
7
|
def initialize(file_path)
|
8
8
|
@path = file_path
|
@@ -22,8 +22,8 @@ module TeRex
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
end
|
25
|
-
|
26
25
|
end
|
26
|
+
|
27
27
|
end
|
28
28
|
end
|
29
29
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Format
|
3
|
+
require 'csv'
|
4
|
+
class ErrorFile
|
5
|
+
|
6
|
+
attr_reader :sentences, :path
|
7
|
+
|
8
|
+
@@csv_conf = {:headers => true}
|
9
|
+
|
10
|
+
def initialize(file_path)
|
11
|
+
@path = file_path
|
12
|
+
end
|
13
|
+
|
14
|
+
# Each row of csv as Array object, strip it and return
|
15
|
+
def scanner
|
16
|
+
accumulator = []
|
17
|
+
CSV.foreach(@path, @@csv_conf) do |row|
|
18
|
+
next if row.empty?
|
19
|
+
stripped_line = row[0].strip
|
20
|
+
unless stripped_line.nil? || stripped_line.empty?
|
21
|
+
accumulator << stripped_line
|
22
|
+
end
|
23
|
+
end
|
24
|
+
@sentences ||= accumulator
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/te_rex/corpus.rb
CHANGED
@@ -2,39 +2,88 @@ module TeRex
|
|
2
2
|
module Corpus
|
3
3
|
class Body
|
4
4
|
|
5
|
-
|
5
|
+
attr_reader :set, :sample_size, :training, :testing, :format_klass, :category_klass, :total_sentences
|
6
6
|
|
7
|
-
def initialize(glob,
|
7
|
+
def initialize(glob: "", partition: :file, format_klass: NilClass, category_klass: NilClass)
|
8
8
|
@glob = glob
|
9
|
-
@
|
9
|
+
@format_klass = format_klass
|
10
|
+
@category_klass = category_klass
|
11
|
+
@partition = partition
|
10
12
|
end
|
11
13
|
|
14
|
+
#@sample_size = (@set.count * 0.75).round
|
12
15
|
def build
|
13
|
-
|
14
|
-
@
|
15
|
-
|
16
|
+
define_set
|
17
|
+
case @partition
|
18
|
+
when /file/
|
19
|
+
file_partition
|
20
|
+
else
|
21
|
+
sentence_partition
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def file_partition
|
26
|
+
@sample_size = (@set.count.to_f * 0.75).round
|
27
|
+
@training = partition_training_by_file
|
28
|
+
@testing = partition_test_by_file
|
29
|
+
count_all
|
30
|
+
end
|
31
|
+
|
32
|
+
def sentence_partition
|
33
|
+
#super_set = build_superset
|
34
|
+
corpus_set = partition_files_for_sentences
|
35
|
+
#@sample_size = (superset.count.to_f * 0.75).round
|
36
|
+
@sample_size = 0.0
|
37
|
+
@training = partition_training_by_sentence(corpus_set)
|
38
|
+
@testing = partition_test_by_sentence(corpus_set)
|
39
|
+
count_all
|
16
40
|
end
|
17
41
|
|
18
|
-
def
|
19
|
-
@
|
20
|
-
@
|
42
|
+
def define_set
|
43
|
+
@set ||= Dir[@glob].map do |file|
|
44
|
+
@format_klass.new(file)
|
21
45
|
end
|
22
|
-
@
|
23
|
-
|
46
|
+
@set
|
47
|
+
end
|
48
|
+
|
49
|
+
def partition_training_by_file
|
50
|
+
@set[0..@sample_size].map do |file|
|
51
|
+
file.scanner
|
52
|
+
end.flatten
|
24
53
|
end
|
25
54
|
|
26
|
-
def
|
27
|
-
@
|
55
|
+
def partition_test_by_file
|
56
|
+
@set[(@sample_size - 1)..-1].map do |file|
|
28
57
|
file.scanner
|
29
58
|
end.flatten
|
30
59
|
end
|
31
60
|
|
32
|
-
def
|
33
|
-
@
|
61
|
+
def partition_files_for_sentences
|
62
|
+
@set.map do |file|
|
34
63
|
file.scanner
|
35
64
|
end.flatten
|
36
65
|
end
|
37
66
|
|
67
|
+
def partition_training_by_sentence(c_set)
|
68
|
+
c_set.sample(c_set.count * 0.75)
|
69
|
+
end
|
70
|
+
|
71
|
+
def partition_test_by_sentence(c_set)
|
72
|
+
c_set.sample(c_set.count * 0.25)
|
73
|
+
end
|
74
|
+
|
75
|
+
def build_superset
|
76
|
+
@set.reduce([]) do |memo,formatter|
|
77
|
+
memo << formatter.sentences
|
78
|
+
end.flatten
|
79
|
+
end
|
80
|
+
|
81
|
+
def count_all
|
82
|
+
counter = 0
|
83
|
+
@set.map{|f| counter += f.sentences.count}
|
84
|
+
@total_sentences = counter
|
85
|
+
end
|
86
|
+
|
38
87
|
end
|
39
88
|
end
|
40
89
|
end
|
data/lib/te_rex/version.rb
CHANGED
data/lib/te_rex.rb
CHANGED
@@ -6,8 +6,9 @@
|
|
6
6
|
#end
|
7
7
|
|
8
8
|
require_relative "format/format"
|
9
|
-
require_relative "format/
|
10
|
-
require_relative "format/
|
9
|
+
require_relative "format/brown_file"
|
10
|
+
require_relative "format/basic_file"
|
11
|
+
require_relative "format/error_file"
|
11
12
|
require_relative "te_rex/stop_word"
|
12
13
|
require_relative "te_rex/alpha_num"
|
13
14
|
require_relative "te_rex/bayes_data"
|
data/test/bayes_data_test.rb
CHANGED
@@ -58,7 +58,7 @@ class BayesDataTest < MicroTest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
test "index frequency has correct counts" do
|
61
|
-
s =
|
61
|
+
s = 'Here is a sentence $141.34 that that $60 that 123.56 I need & & ^ % $c#@ to check the index is correct and okay.'
|
62
62
|
result = TeRex::Classifier::BayesData.index_frequency(s)
|
63
63
|
|
64
64
|
assert result[:moneyterm] == 3
|
data/test/corpus_test.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
#require_relative "../lib/te_rex"
|
2
|
+
class CorpusTest < MicroTest::Test
|
3
|
+
class MockErrorClassifier
|
4
|
+
end
|
5
|
+
|
6
|
+
@@error_corpus = TeRex::Corpus::Body.new(glob: "test/test_modules/*.csv", format_klass: TeRex::Format::ErrorFile, category_klass: MockErrorClassifier)
|
7
|
+
@@error_corpus.build
|
8
|
+
|
9
|
+
test "Corpus has correct data before building" do
|
10
|
+
assert @@error_corpus.format_klass.name == "TeRex::Format::ErrorFile"
|
11
|
+
assert @@error_corpus.category_klass.name == "CorpusTest::MockErrorClassifier"
|
12
|
+
end
|
13
|
+
|
14
|
+
test "total count of sentences is correct" do
|
15
|
+
assert @@error_corpus.total_sentences == 12
|
16
|
+
end
|
17
|
+
|
18
|
+
test "ratio of training to testing is within 70%" do
|
19
|
+
ratio = @@error_corpus.testing.count.to_f / @@error_corpus.training.count.to_f
|
20
|
+
assert (60...80).map{|i| i}.include?((ratio * 100).to_i)
|
21
|
+
end
|
22
|
+
|
23
|
+
test "sentence counts are correct" do
|
24
|
+
assert @@error_corpus.set.count == 3
|
25
|
+
assert @@error_corpus.training.count == 12
|
26
|
+
assert @@error_corpus.testing.count == 8
|
27
|
+
end
|
28
|
+
|
29
|
+
@@sent_corpus = TeRex::Corpus::Body.new(glob: "test/test_modules/*.csv", partition: :sentence, format_klass: TeRex::Format::ErrorFile, category_klass: MockErrorClassifier)
|
30
|
+
@@sent_corpus.build
|
31
|
+
|
32
|
+
test "Corpus has correct data before building" do
|
33
|
+
assert @@sent_corpus.format_klass.name == "TeRex::Format::ErrorFile"
|
34
|
+
assert @@sent_corpus.category_klass.name == "CorpusTest::MockErrorClassifier"
|
35
|
+
end
|
36
|
+
|
37
|
+
test "total count of sentences is correct" do
|
38
|
+
assert @@sent_corpus.total_sentences == 12
|
39
|
+
end
|
40
|
+
|
41
|
+
test "ratio of training to total is about 75%" do
|
42
|
+
ratio = @@sent_corpus.training.count.to_f / @@sent_corpus.total_sentences
|
43
|
+
assert (72...77).map{|i| i}.include?((ratio * 100).to_i)
|
44
|
+
end
|
45
|
+
|
46
|
+
test "ratio of training to total is about 25%" do
|
47
|
+
ratio = @@sent_corpus.testing.count.to_f / @@sent_corpus.total_sentences
|
48
|
+
assert (22...27).map{|i| i}.include?((ratio * 100).to_i)
|
49
|
+
end
|
50
|
+
|
51
|
+
test "sentence counts are correct" do
|
52
|
+
assert @@sent_corpus.set.count == 3
|
53
|
+
assert @@sent_corpus.training.count == 9
|
54
|
+
assert @@sent_corpus.testing.count == 3
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
data/test/sparse_bayes_test.rb
CHANGED
@@ -22,7 +22,12 @@ class SparseBayesTest < MicroTest::Test
|
|
22
22
|
"The cancellation policy will be determined when the rate is validated."
|
23
23
|
]
|
24
24
|
|
25
|
-
@@cls = TeRex::Classifier::Bayes.new(
|
25
|
+
@@cls = TeRex::Classifier::Bayes.new(
|
26
|
+
{:tag => "Refund", :msg => "We are pleased to offer you a refund"},
|
27
|
+
{:tag => "Partrefund", :msg => "You may receive a partial refund"},
|
28
|
+
{:tag => "Nonrefund", :msg => "Much apologies, no refund to you"},
|
29
|
+
{:tag => "Unknown", :msg => "Waht?"}
|
30
|
+
)
|
26
31
|
@@refund.each {|txt| @@cls.train("Refund", txt) }
|
27
32
|
@@partrefund.each {|txt| @@cls.train("Partrefund", txt) }
|
28
33
|
@@norefund.each {|txt| @@cls.train("Nonrefund", txt) }
|
@@ -40,15 +45,15 @@ class SparseBayesTest < MicroTest::Test
|
|
40
45
|
s_non1 = @@cls.classify(s_non)
|
41
46
|
s_unk1= @@cls.classify(s_unk)
|
42
47
|
|
43
|
-
assert s_refund1 == "Refund"
|
44
|
-
assert s_partial1 == "Partrefund"
|
45
|
-
assert s_non1 == "Nonrefund"
|
46
|
-
assert s_unk1 == "Unknown"
|
48
|
+
assert s_refund1 == ["Refund", "We are pleased to offer you a refund"]
|
49
|
+
assert s_partial1 == ["Partrefund", "You may receive a partial refund"]
|
50
|
+
assert s_non1 == ["Nonrefund", "Much apologies, no refund to you"]
|
51
|
+
assert s_unk1 == ["Unknown", "Waht?"]
|
47
52
|
|
48
|
-
assert s_refund1 != "Partrefund"
|
49
|
-
assert s_partial1 != "Refund"
|
50
|
-
assert s_non1 != "Unknown"
|
51
|
-
assert s_unk1 != "Nonrefund"
|
53
|
+
assert s_refund1 != ["Partrefund", "You may receive a partial refund"]
|
54
|
+
assert s_partial1 != ["Refund", "We are pleased to offer you a refund"]
|
55
|
+
assert s_non1 != ["Unknown", "Waht?"]
|
56
|
+
assert s_unk1 != ["Nonrefund", "Much apologies, no refund to you"]
|
52
57
|
end
|
53
58
|
|
54
59
|
|
@@ -64,10 +69,10 @@ class SparseBayesTest < MicroTest::Test
|
|
64
69
|
s33 = @@cls.classify(s3)
|
65
70
|
s44 = @@cls.classify(s4)
|
66
71
|
|
67
|
-
assert s11 == "Unknown"
|
68
|
-
assert s22 == "Unknown"
|
69
|
-
assert s33 == "Unknown"
|
70
|
-
assert s44 == "Unknown"
|
72
|
+
assert s11 == ["Unknown", "Waht?"]
|
73
|
+
assert s22 == ["Unknown", "Waht?"]
|
74
|
+
assert s33 == ["Unknown", "Waht?"]
|
75
|
+
assert s44 == ["Unknown", "Waht?"]
|
71
76
|
end
|
72
77
|
|
73
78
|
test "Sparse Data Set Test: Micro examples should return correct classification" do
|
@@ -82,15 +87,15 @@ class SparseBayesTest < MicroTest::Test
|
|
82
87
|
s33 = @@cls.classify(s3)
|
83
88
|
s44 = @@cls.classify(s4)
|
84
89
|
|
85
|
-
assert s11 == "Refund"
|
86
|
-
assert s22 == "Partrefund"
|
87
|
-
assert s33 == "Nonrefund"
|
88
|
-
assert s44 == "Unknown"
|
90
|
+
assert s11 == ["Refund", "We are pleased to offer you a refund"]
|
91
|
+
assert s22 == ["Partrefund","You may receive a partial refund"]
|
92
|
+
assert s33 == ["Nonrefund", "Much apologies, no refund to you"]
|
93
|
+
assert s44 == ["Unknown", "Waht?"]
|
89
94
|
|
90
|
-
assert s11 != "Partrefund"
|
91
|
-
assert s22 != "Refund"
|
92
|
-
assert s33 != "Unknown"
|
93
|
-
assert s44 != "Nonrefund"
|
95
|
+
assert s11 != ["Partrefund", "You may receive a partial refund"]
|
96
|
+
assert s22 != ["Refund", "We are pleased to offer you a refund"]
|
97
|
+
assert s33 != ["Unknown", "Waht?"]
|
98
|
+
assert s44 != ["Nonrefund", "Much apologies, no refund to you"]
|
94
99
|
end
|
95
100
|
|
96
101
|
|
@@ -106,18 +111,18 @@ test "Sparse Data Set Test: Micro examples should NOT match fake classes" do
|
|
106
111
|
s33 = @@cls.classify(s3)
|
107
112
|
s44 = @@cls.classify(s4)
|
108
113
|
|
109
|
-
assert s11 != "Computers"
|
110
|
-
assert s22 != "Science"
|
111
|
-
assert s33 != "Entertainment"
|
112
|
-
assert s44 != "Sports"
|
114
|
+
assert s11 != ["Computers", "computers yay!"]
|
115
|
+
assert s22 != ["Science", "science yay!"]
|
116
|
+
assert s33 != ["Entertainment", "entertainment yay!"]
|
117
|
+
assert s44 != ["Sports", "sports yay!"]
|
113
118
|
end
|
114
119
|
|
115
120
|
test "Sparse Data Set Test: Category counts are equivalent with number of training data per class" do
|
116
121
|
|
117
|
-
assert @@cls.category_counts[:Refund] == @@refund.count
|
118
|
-
assert @@cls.category_counts[:Partrefund] == @@partrefund.count
|
119
|
-
assert @@cls.category_counts[:Nonrefund] == @@norefund.count
|
120
|
-
assert @@cls.category_counts[:Unknown] == @@unknown.count
|
122
|
+
assert @@cls.category_counts[:Refund] == @@refund.count
|
123
|
+
assert @@cls.category_counts[:Partrefund] == @@partrefund.count
|
124
|
+
assert @@cls.category_counts[:Nonrefund] == @@norefund.count
|
125
|
+
assert @@cls.category_counts[:Unknown] == @@unknown.count
|
121
126
|
|
122
127
|
end
|
123
128
|
|
data/test/trained_bayes_test.rb
CHANGED
@@ -8,7 +8,12 @@ class TrainedBayesTest < MicroTest::Test
|
|
8
8
|
@@norefund = TeRex::Train::NONREFUND
|
9
9
|
@@unknown = TeRex::Train::UNKNOWN
|
10
10
|
|
11
|
-
@@cls = TeRex::Classifier::Bayes.new(
|
11
|
+
@@cls = TeRex::Classifier::Bayes.new(
|
12
|
+
{:tag => "Refund", :msg => "We are pleased to offer you a refund"},
|
13
|
+
{:tag => "Partrefund", :msg => "You may receive a partial refund"},
|
14
|
+
{:tag => "Nonrefund", :msg => "Much apologies, no refund to you"},
|
15
|
+
{:tag => "Unknown", :msg => "Waht?"}
|
16
|
+
)
|
12
17
|
@@refund.each {|txt| @@cls.train("Refund", txt) }
|
13
18
|
@@partrefund.each {|txt| @@cls.train("Partrefund", txt) }
|
14
19
|
@@norefund.each {|txt| @@cls.train("Nonrefund", txt) }
|
@@ -27,18 +32,18 @@ class TrainedBayesTest < MicroTest::Test
|
|
27
32
|
s_unk1= @@cls.classify(s_unk)
|
28
33
|
|
29
34
|
# We are lenient on Partrefund || Refund but we still want to see when it fails
|
30
|
-
assert s_refund1 == "Refund" || "Partrefund"
|
35
|
+
assert s_refund1 == ["Refund", "We are pleased to offer you a refund"] || ["Partrefund", "You may receive a partial refund"]
|
31
36
|
# We are lenient on Refund || Partrefund because of the non-distinctness of the two.
|
32
|
-
assert s_partial1 == "Partrefund" || "Refund"
|
33
|
-
assert s_non1 == "Nonrefund"
|
34
|
-
assert s_unk1 == "Unknown"
|
37
|
+
assert s_partial1 == ["Partrefund", "You may receive a partial refund"] || ["Refund", "We are pleased to offer you a refund"]
|
38
|
+
assert s_non1 == ["Nonrefund", "Much apologies, no refund to you"]
|
39
|
+
assert s_unk1 == ["Unknown", "Waht?"]
|
35
40
|
|
36
41
|
# We are lenient on Partrefund || Refund but we still want to see when it fails
|
37
|
-
assert s_refund1 != "Partrefund"
|
42
|
+
assert s_refund1 != ["Partrefund", "You may receive a partial refund"]
|
38
43
|
# We are lenient on Refund || Partrefund but we still want to see when it fails
|
39
|
-
assert s_partial1 != "Refund"
|
40
|
-
assert s_non1 != "Unknown"
|
41
|
-
assert s_unk1 != "Nonrefund"
|
44
|
+
assert s_partial1 != ["Refund", "We are pleased to offer you a refund"]
|
45
|
+
assert s_non1 != ["Unknown", "Waht?"]
|
46
|
+
assert s_unk1 != ["Nonrefund", "Much apologies, no refund to you"]
|
42
47
|
end
|
43
48
|
|
44
49
|
|
@@ -54,10 +59,10 @@ class TrainedBayesTest < MicroTest::Test
|
|
54
59
|
norefund_s11 = @@cls.classify(norefund_s1)
|
55
60
|
unk_s11 = @@cls.classify(unk_s1)
|
56
61
|
|
57
|
-
assert refund_s11 == "Refund"
|
58
|
-
assert partrefund_s11 == "Partrefund"
|
59
|
-
assert norefund_s11 == "Nonrefund"
|
60
|
-
assert unk_s11 == "Unknown"
|
62
|
+
assert refund_s11 == ["Refund", "We are pleased to offer you a refund"]
|
63
|
+
assert partrefund_s11 == ["Partrefund", "You may receive a partial refund"]
|
64
|
+
assert norefund_s11 == ["Nonrefund", "Much apologies, no refund to you"]
|
65
|
+
assert unk_s11 == ["Unknown", "Waht?"]
|
61
66
|
end
|
62
67
|
|
63
68
|
test "Training Data Set Test: Micro examples should return correct classification" do
|
@@ -72,15 +77,15 @@ class TrainedBayesTest < MicroTest::Test
|
|
72
77
|
s33 = @@cls.classify(s3)
|
73
78
|
s44 = @@cls.classify(s4)
|
74
79
|
|
75
|
-
assert s11 == "Refund"
|
76
|
-
assert s22 == "Partrefund"
|
77
|
-
assert s33 == "Nonrefund"
|
78
|
-
assert s44 == "Unknown"
|
80
|
+
assert s11 == ["Refund", "We are pleased to offer you a refund"]
|
81
|
+
assert s22 == ["Partrefund", "You may receive a partial refund"]
|
82
|
+
assert s33 == ["Nonrefund", "Much apologies, no refund to you"]
|
83
|
+
assert s44 == ["Unknown", "Waht?"]
|
79
84
|
|
80
|
-
assert s11 != "Partrefund"
|
81
|
-
assert s22 != "
|
82
|
-
assert s33 != "Unknown"
|
83
|
-
assert s44 != "
|
85
|
+
assert s11 != ["Partrefund", "You may receive a partial refund"]
|
86
|
+
assert s22 != ["Nonrefund", "Much apologies, no refund to you"]
|
87
|
+
assert s33 != ["Unknown", "Waht?"]
|
88
|
+
assert s44 != ["Refund", "We are pleased to offer you a refund"]
|
84
89
|
end
|
85
90
|
|
86
91
|
test "Training Data Set Test: Micro examples should NOT match fake classes" do
|
@@ -95,10 +100,10 @@ class TrainedBayesTest < MicroTest::Test
|
|
95
100
|
s33 = @@cls.classify(s3)
|
96
101
|
s44 = @@cls.classify(s4)
|
97
102
|
|
98
|
-
assert s11 != "Computers"
|
99
|
-
assert s22 != "Science"
|
100
|
-
assert s33 != "Entertainment"
|
101
|
-
assert s44 != "Sports"
|
103
|
+
assert s11 != ["Computers", "computers yay!"]
|
104
|
+
assert s22 != ["Science", "science yay!"]
|
105
|
+
assert s33 != ["Entertainment", "entertainment yay!"]
|
106
|
+
assert s44 != ["Sports", "sports yay!"]
|
102
107
|
end
|
103
108
|
|
104
109
|
test "Training Data Set Test: Ambiguous examples should return 'Unknown'" do
|
@@ -113,10 +118,10 @@ class TrainedBayesTest < MicroTest::Test
|
|
113
118
|
s33 = @@cls.classify(s3)
|
114
119
|
s44 = @@cls.classify(s4)
|
115
120
|
|
116
|
-
assert s11 == "Unknown"
|
117
|
-
assert s22 == "Unknown"
|
118
|
-
assert s33 == "Unknown"
|
119
|
-
assert s44 == "Unknown"
|
121
|
+
assert s11 == ["Unknown", "Waht?"]
|
122
|
+
assert s22 == ["Unknown", "Waht?"]
|
123
|
+
assert s33 == ["Unknown", "Waht?"]
|
124
|
+
assert s44 == ["Unknown", "Waht?"]
|
120
125
|
end
|
121
126
|
|
122
127
|
test "Training Data Set Test: Category counts are equivalent with number of training data per class" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: te_rex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joshua Bowles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fast-stemmer
|
@@ -150,20 +150,6 @@ dependencies:
|
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.4.9.1
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: simplecov
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - ">="
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
160
|
-
type: :development
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - ">="
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
167
153
|
description: Simple text processing for small data sets.
|
168
154
|
email:
|
169
155
|
- jbowayles@gmail.com
|
@@ -171,8 +157,9 @@ executables: []
|
|
171
157
|
extensions: []
|
172
158
|
extra_rdoc_files: []
|
173
159
|
files:
|
174
|
-
- lib/format/
|
175
|
-
- lib/format/
|
160
|
+
- lib/format/basic_file.rb
|
161
|
+
- lib/format/brown_file.rb
|
162
|
+
- lib/format/error_file.rb
|
176
163
|
- lib/format/format.rb
|
177
164
|
- lib/te_rex.rb
|
178
165
|
- lib/te_rex/alpha_num.rb
|
@@ -183,8 +170,8 @@ files:
|
|
183
170
|
- lib/te_rex/version.rb
|
184
171
|
- test/alpha_num_test.rb
|
185
172
|
- test/bayes_data_test.rb
|
173
|
+
- test/corpus_test.rb
|
186
174
|
- test/sparse_bayes_test.rb
|
187
|
-
- test/test_helper.rb
|
188
175
|
- test/test_modules/nonrefund.rb
|
189
176
|
- test/test_modules/partrefund.rb
|
190
177
|
- test/test_modules/refund.rb
|
@@ -210,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
210
197
|
version: '0'
|
211
198
|
requirements: []
|
212
199
|
rubyforge_project:
|
213
|
-
rubygems_version: 2.
|
200
|
+
rubygems_version: 2.4.3
|
214
201
|
signing_key:
|
215
202
|
specification_version: 4
|
216
203
|
summary: Basic NLP stuff for small data sets. Naive Bayes classification and corpora
|
@@ -218,8 +205,8 @@ summary: Basic NLP stuff for small data sets. Naive Bayes classification and cor
|
|
218
205
|
test_files:
|
219
206
|
- test/alpha_num_test.rb
|
220
207
|
- test/bayes_data_test.rb
|
208
|
+
- test/corpus_test.rb
|
221
209
|
- test/sparse_bayes_test.rb
|
222
|
-
- test/test_helper.rb
|
223
210
|
- test/test_modules/nonrefund.rb
|
224
211
|
- test/test_modules/partrefund.rb
|
225
212
|
- test/test_modules/refund.rb
|
data/test/test_helper.rb
DELETED
File without changes
|