te_rex 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/te_rex/bayes_data.rb +3 -3
- data/lib/te_rex/version.rb +1 -1
- data/test/bayes_data_test.rb +1 -1
- data/test/corpus_test.rb +0 -1
- data/test/trained_bayes_provider_errors_test.rb +7 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bae6e503a7f674b25ede59c7f9ef1c5de54b6a78
|
4
|
+
data.tar.gz: 71e7da095d199a9bc51419a3432912fb461b23c4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 84220db22732d965161f8e677662b4cb9392f02762d4cf89ef027cdf9622fdc10f8eb26fea72e91301151f6fe24cc71a09029b4d31c77a753b5f8005bfff1868
|
7
|
+
data.tar.gz: 3fb8d3e66ff969b967075d13a898b8a12e81c5456996e890cb77fe6b21ed076be59d4749ca3d3715d429a298cc48969b516ccfc42548fd51f700bd638dc9aada
|
data/lib/te_rex/bayes_data.rb
CHANGED
@@ -39,8 +39,8 @@ module TeRex
|
|
39
39
|
# Each word in the string is interned and shows count in the document.
|
40
40
|
def index_frequency(text)
|
41
41
|
cfi = clean_stemmed_filtered_index(text)
|
42
|
-
|
43
|
-
cfi
|
42
|
+
cni = clean_filtered_index(text)
|
43
|
+
cfi.merge(cni)
|
44
44
|
end
|
45
45
|
|
46
46
|
# Return text with datetime and moneyterms replaced, remove cardinal terms (1st, 23rd, 42nd), remove punctuation.
|
@@ -88,7 +88,7 @@ module TeRex
|
|
88
88
|
idx = Hash.new(0)
|
89
89
|
word_array.each do |word|
|
90
90
|
word.downcase!
|
91
|
-
if !TeRex::StopWord::LIST.include?(word)
|
91
|
+
if !TeRex::StopWord::LIST.include?(word) && word.length > 3
|
92
92
|
idx[word.intern] += 1
|
93
93
|
end
|
94
94
|
end
|
data/lib/te_rex/version.rb
CHANGED
data/test/bayes_data_test.rb
CHANGED
data/test/corpus_test.rb
CHANGED
@@ -8,7 +8,7 @@ class TrainedBayesProviderErrorsTest < PryTest::Test
|
|
8
8
|
@@credit_data = TeRex::Train::CREDITDATA
|
9
9
|
@@credit_decline = TeRex::Train::CREDITDECLINE
|
10
10
|
@@credit_service = TeRex::Train::CREDITSERVICE
|
11
|
-
|
11
|
+
#@@unexpected = TeRex::Train::UNEXPECTED
|
12
12
|
#@@unk = TeRex::Train::UNKNOWNERROR
|
13
13
|
|
14
14
|
@@cls = TeRex::Classifier::Bayes.new(
|
@@ -19,7 +19,7 @@ class TrainedBayesProviderErrorsTest < PryTest::Test
|
|
19
19
|
{:tag => "CreditDataError", :msg => "Credit Card data is invalid"},
|
20
20
|
{:tag => "CreditDeclineError", :msg => "Waht? Credit Card declined!"},
|
21
21
|
{:tag => "CreditServiceError", :msg => "External service problem processing"},
|
22
|
-
{:tag => "UnexpectedResponseError", :msg => "Unexpected response"}
|
22
|
+
#{:tag => "UnexpectedResponseError", :msg => "Unexpected response"}
|
23
23
|
#{:tag => "UnknownError", :msg => "Unexpected response"},
|
24
24
|
)
|
25
25
|
@@avail.each {|txt| @@cls.train("AvailabilityError", txt) }
|
@@ -29,7 +29,7 @@ class TrainedBayesProviderErrorsTest < PryTest::Test
|
|
29
29
|
@@credit_data.each {|txt| @@cls.train("CreditDataError", txt) }
|
30
30
|
@@credit_decline.each {|txt| @@cls.train("CreditDeclineError", txt) }
|
31
31
|
@@credit_service.each {|txt| @@cls.train("CreditServiceError", txt) }
|
32
|
-
|
32
|
+
#@@unexpected.each {|txt| @@cls.train("UnexpectedResponseError", txt) }
|
33
33
|
#@@unk.each {|txt| @@cls.train("UnknownError", txt) }
|
34
34
|
|
35
35
|
|
@@ -63,6 +63,10 @@ class TrainedBayesProviderErrorsTest < PryTest::Test
|
|
63
63
|
#assert s_unexpected1 == ["UnexpectedResponseError", "Unexpected response"]
|
64
64
|
end
|
65
65
|
|
66
|
+
test "total word counts correct" do
|
67
|
+
assert @@cls.total_words == 5696
|
68
|
+
end
|
69
|
+
|
66
70
|
|
67
71
|
# test "Training Data Set Test: cancel-cacancelical examples should classify correctly" do
|
68
72
|
#
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: te_rex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joshua Bowles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fast-stemmer
|