te_rex 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/format/corpus/basic_file.rb +29 -0
- data/lib/format/corpus/brown_file.rb +31 -0
- data/lib/format/format.rb +7 -0
- data/lib/te_rex/alpha_num.rb +36 -0
- data/lib/te_rex/bayes.rb +73 -0
- data/lib/te_rex/bayes_data.rb +100 -0
- data/lib/te_rex/corpus.rb +42 -0
- data/lib/te_rex/stop_word.rb +68 -0
- data/lib/te_rex/version.rb +3 -0
- data/lib/te_rex.rb +11 -0
- data/test/alpha_num_test.rb +18 -0
- data/test/bayes_data_test.rb +70 -0
- data/test/sparse_bayes_test.rb +130 -0
- data/test/test_modules/nonrefund.rb +35 -0
- data/test/test_modules/partrefund.rb +30 -0
- data/test/test_modules/refund.rb +32 -0
- data/test/test_modules/unknown.rb +9 -0
- data/test/trained_bayes_test.rb +140 -0
- metadata +231 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0a89389faff81b10c963d6469dfb52e514387813
|
4
|
+
data.tar.gz: 443a437ac01c887945eb14af4cc36aee5257977e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a03cd6e851150d5d9fdfced7942b1b7d9e0d20f47c08d5ac81edbd10337568c148b072a41f40b5615c2610d52535a1c6045874cac4c3baee90c18abe58143647
|
7
|
+
data.tar.gz: 2efe3da36bb2981b519ef4d5310915f7cb0906f1481a27dca3ab041a2847de0354feca7ec60f9404a5f6837ae0561b4f41b7e3454d892971ba39a64cc0030b86
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Format
|
3
|
+
class BasicFile
|
4
|
+
|
5
|
+
attr_accessor :sentences
|
6
|
+
|
7
|
+
def initialize(file_path)
|
8
|
+
@path = file_path
|
9
|
+
end
|
10
|
+
|
11
|
+
# Each line of file with Array object,
|
12
|
+
# strip it, split by whitespace, map it,
|
13
|
+
# split words by '/' to separate POS tags,
|
14
|
+
# join by whitespace
|
15
|
+
def scanner
|
16
|
+
@sentences ||= File.open(@path) do |file|
|
17
|
+
file.each_line.each_with_object([]) do |line, acc|
|
18
|
+
stripped_line = line.strip
|
19
|
+
|
20
|
+
unless stripped_line.nil? || stripped_line.empty?
|
21
|
+
acc << line.split(' ').map {|word| word}.join(' ')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Format
|
3
|
+
class BrownFile
|
4
|
+
|
5
|
+
attr_accessor :sentences
|
6
|
+
|
7
|
+
def initialize(file_path)
|
8
|
+
@path = file_path
|
9
|
+
end
|
10
|
+
|
11
|
+
# Each line of file with Array object,
|
12
|
+
# strip it, split by whitespace, map it,
|
13
|
+
# split words by '/' to separate POS tags,
|
14
|
+
# join by whitespace
|
15
|
+
def scanner
|
16
|
+
@sentences ||= File.open(@path) do |file|
|
17
|
+
file.each_line.each_with_object([]) do |line, acc|
|
18
|
+
stripped_line = line.strip
|
19
|
+
|
20
|
+
unless stripped_line.nil? || stripped_line.empty?
|
21
|
+
acc << line.split(' ').map do |word|
|
22
|
+
word.split('/').first
|
23
|
+
end.join(' ')
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# This module generates an alphabet of alpha-numeric characters for use in generating fake data.
|
2
|
+
module TeRex
|
3
|
+
module AlphaNum
|
4
|
+
@symbols = ['!','@','#','$','%','^','&','*','(',')','-','+','=','<','>','?','~']
|
5
|
+
def self.gen
|
6
|
+
('a'..'z').map{|aleph| aleph}
|
7
|
+
.concat(('A'..'Z').map{|aleph2| aleph2})
|
8
|
+
.concat(@symbols)
|
9
|
+
.concat((1..20).map{|num| num})
|
10
|
+
end
|
11
|
+
|
12
|
+
#Some randomized date format generator.
|
13
|
+
def self.date
|
14
|
+
day = (1..31).map{|num| num}.sample
|
15
|
+
month = (1..12).map{|num| num}.sample
|
16
|
+
year2 = (10..99).map{|num| num}.sample
|
17
|
+
year4 = (1900..2099).map{|n| n}.sample
|
18
|
+
["#{day}-#{month}-#{year2}",
|
19
|
+
"#{day}/#{month}/#{year2}",
|
20
|
+
"#{day}/#{month}/#{year4}",
|
21
|
+
"#{day}-#{month}-#{year4}",
|
22
|
+
"#{year4}-#{month}-#{day}",
|
23
|
+
"#{year4}/#{month}/#{day}",
|
24
|
+
"#{year2}/#{month}/#{day}",
|
25
|
+
"#{year2}-#{month}-#{day}",
|
26
|
+
"#{month}-#{day}-#{year2}",
|
27
|
+
"#{month}/#{day}/#{year2}",
|
28
|
+
"#{month}/#{day}/#{year4}",
|
29
|
+
"#{month}-#{day}-#{year4}",
|
30
|
+
"#{year4}-#{day}-#{month}",
|
31
|
+
"#{year4}/#{day}/#{month}",
|
32
|
+
"#{year2}/#{day}/#{month}",
|
33
|
+
"#{year2}-#{day}-#{month}"].sample
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/te_rex/bayes.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
#
|
2
|
+
# Refactor of Lucas Carlson's classifier https://github.com/cardmagic/classifier (Copyright (c) 2005 lucas@rufy.com)
|
3
|
+
#
|
4
|
+
module TeRex
|
5
|
+
module Classifier
|
6
|
+
class Bayes
|
7
|
+
|
8
|
+
attr_accessor :category_counts, :total_words
|
9
|
+
|
10
|
+
def initialize(*categories)
|
11
|
+
@clasif = Hash.new
|
12
|
+
categories.each {|cat| @clasif[TeRex::Format.category_term(cat)] = Hash.new}
|
13
|
+
@total_words = 0
|
14
|
+
@category_counts = Hash.new(0)
|
15
|
+
end
|
16
|
+
|
17
|
+
def train(ctgry, text)
|
18
|
+
category = TeRex::Format.category_term(ctgry)
|
19
|
+
@category_counts[category] += 1
|
20
|
+
|
21
|
+
BayesData.index_frequency(text).each do |word, count|
|
22
|
+
@clasif[category][word] ||= 0
|
23
|
+
@clasif[category][word] += count
|
24
|
+
|
25
|
+
@total_words += count
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def classifications(text)
|
30
|
+
score = Hash.new
|
31
|
+
training_count = @category_counts.values.inject {|x,y| x+y}.to_f
|
32
|
+
|
33
|
+
@clasif.each do |category, category_words|
|
34
|
+
score[category.to_s] = 0
|
35
|
+
total = category_words.values.inject(0) {|sum, element| sum+element}
|
36
|
+
BayesData.index_frequency(text).each do |word, count|
|
37
|
+
s = category_words.has_key?(word) ? category_words[word] : 0.1
|
38
|
+
score[category.to_s] += Math.log(s/total.to_f)
|
39
|
+
end
|
40
|
+
|
41
|
+
k = @category_counts.has_key?(category) ? @category_counts[category] : 0.1
|
42
|
+
score[category.to_s] += Math.log(k/training_count)
|
43
|
+
end
|
44
|
+
|
45
|
+
score
|
46
|
+
end
|
47
|
+
|
48
|
+
def classify(text)
|
49
|
+
(classifications(text).sort_by{|a| -a[1]})[0][0]
|
50
|
+
end
|
51
|
+
|
52
|
+
def categories
|
53
|
+
@classif.keys.collect {|c| c.to_s}
|
54
|
+
end
|
55
|
+
|
56
|
+
def training_description
|
57
|
+
max_threshold = (@total_words/self.category_counts.keys.count).to_f
|
58
|
+
tmp = []
|
59
|
+
@clasif.each_pair do |term,val|
|
60
|
+
cc = self.category_counts[term]
|
61
|
+
train_ratio = (@total_words/cc).to_f
|
62
|
+
tmp << [(train_ratio > max_threshold), term, "description" => {"training_ratio" => "#{train_ratio}", "threshold" => "#{max_threshold}", "category_counts" => "#{cc}", "total_words" => "#{@total_words}"}]
|
63
|
+
end
|
64
|
+
tmp
|
65
|
+
end
|
66
|
+
|
67
|
+
def under_trained?
|
68
|
+
training_description.select {|ut| ut.first == true}
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'fast_stemmer'
|
2
|
+
|
3
|
+
module TeRex
|
4
|
+
module Classifier
|
5
|
+
class BayesData
|
6
|
+
class << self
|
7
|
+
|
8
|
+
# Remove all kinds of explicit punctuation.
|
9
|
+
def remove_punct(s)
|
10
|
+
s.gsub(/(\,)|(\?)|(\.)|(\!)|(\;)|(\:)|(\")|(\@)|(\#)|(\$)|(\^)|(\&)|(\*)|(\()|(\))|(\_)|(\=)|(\+)|(\[)|(\])|(\\)|(\|)|(\<)|(\>)|(\/)|(\`)|(\{)|(\})/, '')
|
11
|
+
end
|
12
|
+
|
13
|
+
# Remove cardinal terms (1st, 23rd, 42nd)
|
14
|
+
def remove_cardinal(s)
|
15
|
+
s.gsub(/\d+\w{2}/, '')
|
16
|
+
end
|
17
|
+
|
18
|
+
# Replace date times with TERM (09MAR04, 02-23-14, 2014/03/05)
|
19
|
+
def date_time(s)
|
20
|
+
s.gsub(/(^\d+)|(\s\d+(AM|PM))|(\d{2}\w{3}\d{2})|(\d{2}\:\d{2})|(\d{2,4}\-\d{2,4}-\d{2,4})|(\d{1,3}\/\d{2,4}\/\d{2,4})|(\d+\:\d+)/, 'datetime')
|
21
|
+
end
|
22
|
+
|
23
|
+
# Replace money types with TERM ($60, 120.00, $423.89)
|
24
|
+
def money_term(s)
|
25
|
+
s.gsub(/(\$\d+\.\d+)|(\$\d+)|(\d+\.\d+)/, 'moneyterm')
|
26
|
+
end
|
27
|
+
|
28
|
+
# Return a Hashed Index of words => instance_count.
|
29
|
+
# Each word in the string is interned and shows count in the document.
|
30
|
+
def index_frequency(text)
|
31
|
+
cfi = clean_stemmed_filtered_index(text)
|
32
|
+
cni = clean_filtered_index(text)
|
33
|
+
cfi.merge(cni)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return text with datetime and moneyterms replaced, remove cardinal terms (1st, 23rd, 42nd), remove punctuation.
|
37
|
+
# At one point we were replacing any non-word chars exlcuding spaces (/[^\w\s]/) like so `gsub(/[^\w\s]/, "")` but I took it out as it removed some punctuation needed to distinguish some classes.
|
38
|
+
def clean(text)
|
39
|
+
dt = date_time(text)
|
40
|
+
mt = money_term(dt)
|
41
|
+
rp = remove_punct(mt)
|
42
|
+
remove_cardinal(rp)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return a filtered word freq index with stemmed morphemes and without extra punctuation or short words
|
46
|
+
def clean_stemmed_filtered_index(text)
|
47
|
+
stemmed_filtered_index clean(text).split
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return a filtered word freq index without extra punctuation or short words
|
51
|
+
def clean_filtered_index(text)
|
52
|
+
filtered_index clean(text).split
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return a word freq index without downcasing, stemming, or filtering with stop list
|
56
|
+
def clean_naive_index(text)
|
57
|
+
naive_index clean(text).split
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
# Downcase, filter against stop list, ignore sequences less that 2 chars, and stem words
|
62
|
+
def stemmed_filtered_index(word_array)
|
63
|
+
idx = Hash.new(0)
|
64
|
+
word_array.each do |word|
|
65
|
+
word.downcase!
|
66
|
+
if !TeRex::StopWord::LIST.include?(word) #&& word.length > 2
|
67
|
+
idx[word.stem.intern] += 1
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
idx
|
72
|
+
end
|
73
|
+
|
74
|
+
# Downcase, filter against stop list, and ignore sequences less that 2 chars.
|
75
|
+
def filtered_index(word_array)
|
76
|
+
idx = Hash.new(0)
|
77
|
+
word_array.each do |word|
|
78
|
+
word.downcase!
|
79
|
+
if !TeRex::StopWord::LIST.include?(word) #&& word.length > 2
|
80
|
+
idx[word.intern] += 1
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
idx
|
85
|
+
end
|
86
|
+
|
87
|
+
# Count everything in the word array.
|
88
|
+
def naive_index(word_array)
|
89
|
+
idx = Hash.new(0)
|
90
|
+
word_array.each do |word|
|
91
|
+
idx[word.intern] += 1
|
92
|
+
end
|
93
|
+
|
94
|
+
idx
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Corpus
|
3
|
+
class Body
|
4
|
+
|
5
|
+
attr_accessor :files, :sample_size, :training, :testing
|
6
|
+
|
7
|
+
def initialize(glob, klass)
|
8
|
+
@glob = glob
|
9
|
+
@klass = klass
|
10
|
+
end
|
11
|
+
|
12
|
+
def build
|
13
|
+
get_files
|
14
|
+
@training_sentences = partition_train
|
15
|
+
@testing_sentences = partition_test
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_files
|
19
|
+
@files ||= Dir[@glob].map do |file|
|
20
|
+
@klass.new(file)
|
21
|
+
end
|
22
|
+
@sample_size = (@files.count * 0.75).round
|
23
|
+
@files
|
24
|
+
end
|
25
|
+
|
26
|
+
def partition_train
|
27
|
+
@files[0..@sample_size].map do |file|
|
28
|
+
file.scanner
|
29
|
+
end.flatten
|
30
|
+
end
|
31
|
+
|
32
|
+
def partition_test
|
33
|
+
@files[(@sample_size - 1)..-1].map do |file|
|
34
|
+
file.scanner
|
35
|
+
end.flatten
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module TeRex
|
2
|
+
class StopWord
|
3
|
+
LIST = [
|
4
|
+
"a",
|
5
|
+
"am",
|
6
|
+
"an",
|
7
|
+
"and",
|
8
|
+
"are",
|
9
|
+
"as",
|
10
|
+
"at",
|
11
|
+
"be",
|
12
|
+
"by",
|
13
|
+
"do",
|
14
|
+
"for",
|
15
|
+
"in",
|
16
|
+
"into",
|
17
|
+
"it",
|
18
|
+
"it's",
|
19
|
+
"its",
|
20
|
+
"of",
|
21
|
+
"so",
|
22
|
+
"than",
|
23
|
+
"that",
|
24
|
+
"that's",
|
25
|
+
"the",
|
26
|
+
"what",
|
27
|
+
"what's",
|
28
|
+
"where",
|
29
|
+
"which",
|
30
|
+
"january",
|
31
|
+
"february",
|
32
|
+
"march",
|
33
|
+
"april",
|
34
|
+
"may",
|
35
|
+
"june",
|
36
|
+
"july",
|
37
|
+
"august",
|
38
|
+
"september",
|
39
|
+
"october",
|
40
|
+
"november",
|
41
|
+
"december",
|
42
|
+
"jan",
|
43
|
+
"feb",
|
44
|
+
"mar",
|
45
|
+
"apr",
|
46
|
+
"aug",
|
47
|
+
"sept",
|
48
|
+
"nov",
|
49
|
+
"dec",
|
50
|
+
"monday",
|
51
|
+
"mon",
|
52
|
+
"tuesday",
|
53
|
+
"tue",
|
54
|
+
"wednesday",
|
55
|
+
"wed",
|
56
|
+
"thursday",
|
57
|
+
"thur",
|
58
|
+
"friday",
|
59
|
+
"fri",
|
60
|
+
"saturday",
|
61
|
+
"sat",
|
62
|
+
"sunday",
|
63
|
+
"sun",
|
64
|
+
"pm",
|
65
|
+
"am"
|
66
|
+
]
|
67
|
+
end
|
68
|
+
end
|
data/lib/te_rex.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require_relative "format/format"
|
2
|
+
require_relative "format/corpus/brown_file"
|
3
|
+
require_relative "format/corpus/basic_file"
|
4
|
+
require_relative "te_rex/stop_word"
|
5
|
+
require_relative "te_rex/alpha_num"
|
6
|
+
require_relative "te_rex/bayes_data"
|
7
|
+
require_relative "te_rex/bayes"
|
8
|
+
require_relative "te_rex/corpus"
|
9
|
+
|
10
|
+
module TeRex
|
11
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative "../lib/te_rex"
|
2
|
+
class AlphaNumTest < MicroTest::Test
|
3
|
+
|
4
|
+
test "generates array of lowercase roman characters" do
|
5
|
+
res = TeRex::AlphaNum.gen
|
6
|
+
('a'..'z').each {|char| assert res.include? char}
|
7
|
+
end
|
8
|
+
|
9
|
+
test "generates array of uppercase roman characters" do
|
10
|
+
res = TeRex::AlphaNum.gen
|
11
|
+
('A'..'Z').each {|char| assert res.include? char}
|
12
|
+
end
|
13
|
+
|
14
|
+
test "generates array of integers 1-20" do
|
15
|
+
res = TeRex::AlphaNum.gen
|
16
|
+
(1..20).each {|int| assert res.include? int}
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require_relative "../lib/te_rex"
|
2
|
+
class BayesDataTest < MicroTest::Test
|
3
|
+
|
4
|
+
test "punctuation is removed (except %)" do
|
5
|
+
s1 = "This * punctuation se%ntence ).!"
|
6
|
+
s2 = "Much $ in @ this } [ sentence too?"
|
7
|
+
s3 = "And I$ have c#des in |his one with 100% refund too@>."
|
8
|
+
|
9
|
+
s11 = TeRex::Classifier::BayesData.remove_punct(s1)
|
10
|
+
s22 = TeRex::Classifier::BayesData.remove_punct(s2)
|
11
|
+
s33 = TeRex::Classifier::BayesData.remove_punct(s3)
|
12
|
+
|
13
|
+
assert s11 == "This punctuation se%ntence "
|
14
|
+
assert s22 == "Much in this sentence too"
|
15
|
+
assert s33 == "And I have cdes in his one with 100% refund too"
|
16
|
+
end
|
17
|
+
|
18
|
+
test "datetime is removed and replaced" do
|
19
|
+
s1 = "This $140 will be paid on 09/14/2014"
|
20
|
+
s2 = "I get $20.00 on 2014-05-21 and on 09MAR04"
|
21
|
+
s3 = "I'll pay you $60.21 on 06-20-2014"
|
22
|
+
|
23
|
+
s11 = TeRex::Classifier::BayesData.date_time(s1)
|
24
|
+
s22 = TeRex::Classifier::BayesData.date_time(s2)
|
25
|
+
s33 = TeRex::Classifier::BayesData.date_time(s3)
|
26
|
+
|
27
|
+
assert s11 == "This $140 will be paid on datetime"
|
28
|
+
assert s22 == "I get $20.00 on datetime and on datetime"
|
29
|
+
assert s33 == "I'll pay you $60.21 on datetime"
|
30
|
+
end
|
31
|
+
|
32
|
+
test "moneyterm is removed and replaced" do
|
33
|
+
s1 = "$140 will be paid on 09/14/2014 with $60"
|
34
|
+
s2 = "I get $20.00 on 2014-05-21 and on 09MAR04"
|
35
|
+
s3 = "You'll make $1234.73 on 06-20-2014"
|
36
|
+
|
37
|
+
s11 = TeRex::Classifier::BayesData.money_term(s1)
|
38
|
+
s22 = TeRex::Classifier::BayesData.money_term(s2)
|
39
|
+
s33 = TeRex::Classifier::BayesData.money_term(s3)
|
40
|
+
|
41
|
+
assert s11 == "moneyterm will be paid on 09/14/2014 with moneyterm"
|
42
|
+
assert s22 == "I get moneyterm on 2014-05-21 and on 09MAR04"
|
43
|
+
assert s33 == "You'll make moneyterm on 06-20-2014"
|
44
|
+
end
|
45
|
+
|
46
|
+
test "cleaned text does what we want" do
|
47
|
+
s1 = "$140 will be paid on 09/14/2014 with $60"
|
48
|
+
s2 = "I get $20.00 on 2014-05-21 and on 09MAR04 with %49 and &*%^)"
|
49
|
+
s3 = "And I$ have c#des in |his one wi%th 100% refund too@>."
|
50
|
+
|
51
|
+
s11 = TeRex::Classifier::BayesData.clean(s1)
|
52
|
+
s22 = TeRex::Classifier::BayesData.clean(s2)
|
53
|
+
s33 = TeRex::Classifier::BayesData.clean(s3)
|
54
|
+
|
55
|
+
assert s11 == "moneyterm will be paid on datetime with moneyterm"
|
56
|
+
assert s22 == "I get moneyterm on datetime and on datetime with %49 and %"
|
57
|
+
assert s33 == "And I have cdes in his one wi%th % refund too"
|
58
|
+
end
|
59
|
+
|
60
|
+
test "index frequency has correct counts" do
|
61
|
+
s = "Here is a sentence $141.34 that that $60 that 123.56 I need & & ^ % $c#@ to check the index is correct and okay."
|
62
|
+
result = TeRex::Classifier::BayesData.index_frequency(s)
|
63
|
+
|
64
|
+
assert result[:moneyterm] == 3
|
65
|
+
assert result[:sentenc] == 1
|
66
|
+
assert result[:sentence] == 1
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require_relative "../lib/te_rex"
|
2
|
+
class SparseBayesTest < MicroTest::Test
|
3
|
+
@@refund = [
|
4
|
+
"Free cancellation before 1201 AM on 9/17/14! If you cancel or change your reservation after 1201 AM on 9/17/14 the hotel will charge you for the total cost of your reservation.",
|
5
|
+
"ALL RESERVATIONS MUST BE CANCELLED 24 HOURS PRIOR TO HOST TIME UNLESS DEPOSIT REQUIRED IF THIS RESERVATION HAS BEEN MADE ELECTRONICALLY PLEASE CANCEL IT ELECTRONICALLY TO AVOID CONFUSION AND A NO SHOW BILL. POLICY SUBJECT TO CHANGE. .",
|
6
|
+
"Free cancellation before 800 PM on 9/20/14! If you cancel or change your reservation after 800 PM on 9/20/14 the hotel will charge you $158. If you cancel or change your reservation after 800 PM on 9/21/14 the hotel will charge you for the total cost of your reservation."
|
7
|
+
]
|
8
|
+
|
9
|
+
@@partrefund = [
|
10
|
+
"If you cancel or change your reservation before 1201 AM on 10/21/14 the hotel will charge you $57. If you cancel or change your reservation after 1201 AM on 10/21/14 the hotel will charge you $335. If you cancel or change your reservation after 1201 AM on 10/24/14 the hotel will charge you for the total cost of your reservation.",
|
11
|
+
"If you cancel or change your reservation before 1201 AM on 9/10/14 the hotel will charge you $225. If you cancel or change your reservation after 1201 AM on 9/10/14 the hotel will charge you for the total cost of your reservation.",
|
12
|
+
"Cancellations or changes made before 4:00 PM Eastern Time on Sep 11, 2014 are subject to a 1 Night Room & Tax penalty. Cancellations or changes made after 4:00 PM Eastern Time on Sep 11, 2014 are subject to a 1 Night Room & Tax penalty. The property makes no refunds for no shows or early checkouts."
|
13
|
+
]
|
14
|
+
|
15
|
+
@@norefund = [
|
16
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge.",
|
17
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to change or cancel this booking you will not be refunded any of the payment.",
|
18
|
+
"For the room type and rate that you've selected you are not allowed to change or cancel your reservation. If you cancel your room you will still be charged for the full reservation amount."
|
19
|
+
]
|
20
|
+
|
21
|
+
@@unknown = [
|
22
|
+
"The cancellation policy will be determined when the rate is validated."
|
23
|
+
]
|
24
|
+
|
25
|
+
@@cls = TeRex::Classifier::Bayes.new("Refund", "Partrefund", "Nonrefund", "Unknown")
|
26
|
+
@@refund.each {|txt| @@cls.train("Refund", txt) }
|
27
|
+
@@partrefund.each {|txt| @@cls.train("Partrefund", txt) }
|
28
|
+
@@norefund.each {|txt| @@cls.train("Nonrefund", txt) }
|
29
|
+
@@unknown.each {|txt| @@cls.train("Unknown", txt) }
|
30
|
+
|
31
|
+
test "Sparse Data Set Test: Random exact match sould classify correctly" do
|
32
|
+
|
33
|
+
s_refund = @@refund.sample
|
34
|
+
s_partial = @@partrefund.sample
|
35
|
+
s_non = @@norefund.sample
|
36
|
+
s_unk = @@unknown.sample
|
37
|
+
|
38
|
+
s_refund1 = @@cls.classify(s_refund)
|
39
|
+
s_partial1 = @@cls.classify(s_partial)
|
40
|
+
s_non1 = @@cls.classify(s_non)
|
41
|
+
s_unk1= @@cls.classify(s_unk)
|
42
|
+
|
43
|
+
assert s_refund1 == "Refund"
|
44
|
+
assert s_partial1 == "Partrefund"
|
45
|
+
assert s_non1 == "Nonrefund"
|
46
|
+
assert s_unk1 == "Unknown"
|
47
|
+
|
48
|
+
assert s_refund1 != "Partrefund"
|
49
|
+
assert s_partial1 != "Refund"
|
50
|
+
assert s_non1 != "Unknown"
|
51
|
+
assert s_unk1 != "Nonrefund"
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
test "Sparse Data Set Test: Non-canonical examples should return unknown" do
|
56
|
+
|
57
|
+
s1 = "You will get a full refund and free cancellation"
|
58
|
+
s2 = "You will get a partial refund and be charged"
|
59
|
+
s3 = "You will get non refund"
|
60
|
+
s4 = "You will get a nonsense am I writing here."
|
61
|
+
|
62
|
+
s11 = @@cls.classify(s1)
|
63
|
+
s22 = @@cls.classify(s2)
|
64
|
+
s33 = @@cls.classify(s3)
|
65
|
+
s44 = @@cls.classify(s4)
|
66
|
+
|
67
|
+
assert s11 == "Unknown"
|
68
|
+
assert s22 == "Unknown"
|
69
|
+
assert s33 == "Unknown"
|
70
|
+
assert s44 == "Unknown"
|
71
|
+
end
|
72
|
+
|
73
|
+
test "Sparse Data Set Test: Micro examples should return correct classification" do
|
74
|
+
|
75
|
+
s1 = "Free cancellation before"
|
76
|
+
s2 = "If you cancel or change your reservation before"
|
77
|
+
s3 = "non-refund"
|
78
|
+
s4 = "policy rate validated."
|
79
|
+
|
80
|
+
s11 = @@cls.classify(s1)
|
81
|
+
s22 = @@cls.classify(s2)
|
82
|
+
s33 = @@cls.classify(s3)
|
83
|
+
s44 = @@cls.classify(s4)
|
84
|
+
|
85
|
+
assert s11 == "Refund"
|
86
|
+
assert s22 == "Partrefund"
|
87
|
+
assert s33 == "Nonrefund"
|
88
|
+
assert s44 == "Unknown"
|
89
|
+
|
90
|
+
assert s11 != "Partrefund"
|
91
|
+
assert s22 != "Refund"
|
92
|
+
assert s33 != "Unknown"
|
93
|
+
assert s44 != "Nonrefund"
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
test "Sparse Data Set Test: Micro examples should NOT match fake classes" do
|
98
|
+
|
99
|
+
s1 = "free cancellation"
|
100
|
+
s2 = "partial refund"
|
101
|
+
s3 = "no refund"
|
102
|
+
s4 = "policy rate validated."
|
103
|
+
|
104
|
+
s11 = @@cls.classify(s1)
|
105
|
+
s22 = @@cls.classify(s2)
|
106
|
+
s33 = @@cls.classify(s3)
|
107
|
+
s44 = @@cls.classify(s4)
|
108
|
+
|
109
|
+
assert s11 != "Computers"
|
110
|
+
assert s22 != "Science"
|
111
|
+
assert s33 != "Entertainment"
|
112
|
+
assert s44 != "Sports"
|
113
|
+
end
|
114
|
+
|
115
|
+
test "Sparse Data Set Test: Category counts are equivalent with number of training data per class" do
|
116
|
+
|
117
|
+
assert @@cls.category_counts[:Refund] == @@refund.count
|
118
|
+
assert @@cls.category_counts[:Partrefund] == @@partrefund.count
|
119
|
+
assert @@cls.category_counts[:Nonrefund] == @@norefund.count
|
120
|
+
assert @@cls.category_counts[:Unknown] == @@unknown.count
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
test "Sparse Data Set Test: All SPARSE Training classes should be undertrained... " do
|
125
|
+
res = @@cls.under_trained?
|
126
|
+
assert res.count == 4
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Train
|
3
|
+
NONREFUND = [
|
4
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge.",
|
5
|
+
"This reservation is non-refundable. Cancellations made at any time are subject to a 100% charge.",
|
6
|
+
"This reservation is non-refundable.",
|
7
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge.",
|
8
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to change or cancel this booking you will not be refunded any of the payment.",
|
9
|
+
"This reservation is non-refundable. Cancellations made at any time are subject to a 100% charge.",
|
10
|
+
"This reservation is non-refundable.",
|
11
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge.",
|
12
|
+
"This reservation is non-refundable. Cancellations made at any time are subject to a 100% charge.",
|
13
|
+
"This reservation is non-refundable.",
|
14
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you choose to change or cancel this booking you will not be refunded any of the payment.",
|
15
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to cancel this booking you will not be refunded any of the payment.",
|
16
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to change or cancel this booking you will not be refunded any payment.",
|
17
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to change or cancel this booking you will not be refunded any of the payment.",
|
18
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to change or cancel this booking you will not be refunded any of the payment.",
|
19
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to change or cancel this booking you will not be refunded any of the payment.",
|
20
|
+
"For the room type and rate that you've selected you are not allowed to change or cancel your reservation. If you cancel your room you will still be charged for the full reservation amount.",
|
21
|
+
"For the room type and rate that you've selected you are not allowed to change or cancel your reservation. If you cancel your room you will still be charged for the full reservation amount.",
|
22
|
+
"For the room type and rate that you've selected you are not allowed to change or cancel your reservation. If you cancel your room you will still be charged for the full reservation amount.",
|
23
|
+
"This rate is non-refundable and cannot be changed or cancelled - if you do choose to change or cancel this booking you will not be refunded any of the payment.",
|
24
|
+
"For the room type and rate that you've selected you are not allowed to change or cancel your reservation. If you cancel your room you will still be charged for the full reservation amount.",
|
25
|
+
"For the room type and rate that you've selected you are not allowed to change or cancel your reservation. If you cancel your room you will still be charged for the full reservation amount.",
|
26
|
+
"For the room type and rate that you've selected you are not allowed to change or cancel your reservation. If you cancel your room you will still be charged for the full reservation amount.",
|
27
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge. Refunds are not available for early check-out.",
|
28
|
+
"Non-refundable. Cancellations or changes made at any time are subject to a 100% charge. We are sorry but refunds are not available for early check-out.",
|
29
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge. We are sorry but refunds are not available for early check-out.",
|
30
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge. We are sorry but refunds are not available for early check-out.",
|
31
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge. We are sorry but refunds are not available for early check-out.",
|
32
|
+
"This reservation is non-refundable. Cancellations or changes made at any time are subject to a 100% charge. We are sorry but refunds are not available for early check-out."
|
33
|
+
]
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Train
|
3
|
+
PARTREFUND = [
|
4
|
+
"If you cancel or change your reservation before 1201 AM on 10/21/14 the hotel will charge you $57. If you cancel or change your reservation after 1201 AM on 10/21/14 the hotel will charge you $335. If you cancel or change your reservation after 1201 AM on 10/24/14 the hotel will charge you for the total cost of your reservation.",
|
5
|
+
"If you cancel or change your reservation before 4:00 PM on 9/22/14, the hotel will charge you $388. If you cancel or change your reservation after 4:00 PM on 9/22/14, the hotel will charge you for the total cost of your reservation. ",
|
6
|
+
"If you cancel or change your reservation before 1201 AM on 9/10/14 the hotel will charge you $225. If you cancel or change your reservation after 1201 AM on 9/10/14 the hotel will charge you for the total cost of your reservation.",
|
7
|
+
"Cancellations or changes made before 4:00 PM Eastern Time on Sep 11, 2014 are subject to a 1 Night Room & Tax penalty. Cancellations or changes made after 4:00 PM Eastern Time on Sep 11, 2014 are subject to a 1 Night Room & Tax penalty. The property makes no refunds for no shows or early checkouts.",
|
8
|
+
"If you cancel or change your reservation before 3:00 PM on 07/14/14, the hotel will charge you $189. If you cancel or change your reservation after 3:00 PM on 07/14/14, the hotel will charge you $225. If you cancel or change your reservation after 3:00 PM on 07/17/14 the hotel will charge you for the total cost of your reservation. ",
|
9
|
+
"If you cancel or change your reservation before 3:00 PM on 7\\/31\\/14, the hotel will charge you $305. If you cancel or change your reservation after 3:00 PM on 7\\/31\\/14, the hotel will charge you for the total cost of your reservation. ",
|
10
|
+
"If you cancel or change your reservation before 4:00 PM on 9/22/14, the hotel will charge you $245. If you cancel or change your reservation after 4:00 PM on 9/22/14, the hotel will charge you for the total cost of your reservation. ",
|
11
|
+
"If you cancel or change your reservation before 4:00 PM on 9/24/14, the hotel will charge you $233. If you cancel or change your reservation after 4:00 PM on 9/24/14, the hotel will charge you for the total cost of your reservation. ",
|
12
|
+
"If you cancel or change your reservation before 12:00 PM on 1/05/15, the hotel will charge you $86. If you cancel or change your reservation after 12:00 PM on 1/05/15, the hotel will charge you $625. If you cancel or change your reservation after 12:00 PM on 1/07/15 the hotel will charge you for the total cost of your reservation. ",
|
13
|
+
"If you cancel or change your reservation before 4:00 PM on 9/21/14, the hotel will charge you $215. If you cancel or change your reservation after 4:00 PM on 9/21/14, the hotel will charge you for the total cost of your reservation. ",
|
14
|
+
"If you cancel or change your reservation before 6:00 PM on 04/17/14, the hotel will charge you $187. If you cancel or change your reservation after 6:00 PM on 04/17/14, the hotel will charge you for the total cost of your reservation. ",
|
15
|
+
"If you cancel or change your reservation before 16:00 PM on 3/31/14, the hotel will charge you $89. If you cancel or change your reservation after 16:00 PM on 3/31/14,, the hotel will charge you for the total cost of your reservation. ",
|
16
|
+
"If you cancel or change your reservation before 6:00 AM on 12/07/14, the hotel will charge you $5675. If you cancel or change your reservation after 6:00 AM on 12/07/14, the hotel will charge you for the total cost of your reservation. ",
|
17
|
+
"If you cancel or change your reservation before 3:00 PM on 07/14/14, the hotel will charge you $189. If you cancel or change your reservation after 3:00 PM on 07/14/14, the hotel will charge you $225. If you cancel or change your reservation after 3:00 PM on 07/17/14 the hotel will charge you for the total cost of your reservation. ",
|
18
|
+
"If you cancel or change your reservation before 7:00 AM on 06/09/14, the hotel will charge you $509. If you cancel or change your reservation after 7:00 AM on 06/09/14, the hotel will charge you for the total cost of your reservation. ",
|
19
|
+
"If you cancel or change your reservation before 9:00 PM on 5/23/14, the hotel will charge you $1018. If you cancel or change your reservation after 9:00 PM on 5/23/14, the hotel will charge you for the total cost of your reservation. ",
|
20
|
+
"If you cancel or change your reservation before 10:00 PM on 5/31/14, the hotel will charge you $215. If you cancel or change your reservation after 10:00 PM on 5/31/14, the hotel will charge you $425. If you cancel or change your reservation after 10:00 PM on 6/03/14 the hotel will charge you for the total cost of your reservation. ",
|
21
|
+
"If you cancel or change your reservation before 8:00 AM on 12/17/14, the hotel will charge you $15. If you cancel or change your reservation after 8:00 AM on 12/17/14, the hotel will charge you for the total cost of your reservation. ",
|
22
|
+
"If you cancel or change your reservation before 12:00 PM on 1/05/15, the hotel will charge you $86. If you cancel or change your reservation after 12:00 PM on 1/05/15, the hotel will charge you $625. If you cancel or change your reservation after 12:00 PM on 1/07/15 the hotel will charge you for the total cost of your reservation. ",
|
23
|
+
"If you cancel or change your reservation before 2:00 PM on 11/14/14, the hotel will charge you $57. If you cancel or change your reservation after 2:00 PM on 11/14/14, the hotel will charge you for the total cost of your reservation. ",
|
24
|
+
"If you cancel or change your reservation before 2:00 PM on 11/14/14, the hotel will charge you $57. If you cancel or change your reservation after 2:00 PM on 11/14/14, the hotel will charge you for the total cost of your reservation. ",
|
25
|
+
"If you cancel or change your reservation before 12:00 PM on 1/05/15, the hotel will charge you $86. If you cancel or change your reservation after 12:00 PM on 1/05/15, the hotel will charge you $625. If you cancel or change your reservation after 12:00 PM on 1/07/15 the hotel will charge you for the total cost of your reservation. ",
|
26
|
+
"If you cancel or change your reservation before 4:00 PM on 2/21/15, the hotel will charge you $115. If you cancel or change your reservation after 4:00 PM on 2/21/15, the hotel will charge you for the total cost of your reservation. ",
|
27
|
+
"If you cancel or change your reservation before 4:00 PM on 2/14/15, the hotel will charge you $318. If you cancel or change your reservation after 4:00 PM on 2/14/15, the hotel will charge you for the total cost of your reservation. ",
|
28
|
+
]
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Train
|
3
|
+
REFUND = [
|
4
|
+
"Free cancellation before 1201 AM on 9/17/14! If you cancel or change your reservation after 1201 AM on 9/17/14 the hotel will charge you for the total cost of your reservation. ",
|
5
|
+
"Free cancellation before 12:01 AM on 10/29/14! If you cancel or change your reservation after 12:01 AM on 10/29/14, the hotel will charge you $194. If you cancel or change your reservation after 12:01 AM on 10/30/14, the hotel will charge you for the total cost of your reservation.",
|
6
|
+
"If you cancel or change your reservation after 12:01 AM on 9/12/14, the hotel will charge you for the total cost of your reservation. ",
|
7
|
+
"Cancellations or changes made within 1 day prior to 4:00 PM local hotel time on the day of arrival are subject to a $142.93 charge. Cancellations or changes made after 4:00 PM local hotel time on the day of arrival are subject to a 100% charge. We are sorry but refunds are not available for early check-out.",
|
8
|
+
"We understand that sometimes your travel plans change. We do not charge a change or cancel fee. However, this property (Courtyard by Marriott Traverse City) imposes the following penalty to its customers that we are required to pass on: Cancellations or changes made after 5:00 PM ((GMT-05:00) Eastern Time (US & Canada)) on Jul 9, 2014 are subject to a 1 Night Room & Tax penalty. The property makes no refunds for no shows or early checkouts.",
|
9
|
+
"Free cancellation before 800 PM on 9/20/14! If you cancel or change your reservation after 800 PM on 9/20/14 the hotel will charge you $158. If you cancel or change your reservation after 800 PM on 9/21/14 the hotel will charge you for the total cost of your reservation. ",
|
10
|
+
"Any cancellation received within 2 days prior to arrival date will incur the first night charge. Failure to arrive at your hotel will be treated as a No-Show and will incur the first night charge (Hotel policy).",
|
11
|
+
"For the room type you've selected you can cancel your reservation for a full refund up until noon on Friday September 12th (local hotel time). If you decide to cancel your reservation anytime between noon on Friday September 12th and noon on Saturday September 13th (local hotel time) the hotel requires payment for the first night's stay. You will be charged for the first night's stay including taxes and fees. Any remaining amount will be refunded to you. Refunds or cancellations are not available after noon local hotel time on your day of arrival (Saturday September 13th). ",
|
12
|
+
"THIS PROPERTY REQUIRES A NOTIFICATION OF CANCELLATION BY 4PM HOTEL TIME 1 DAY PRIOR TO ARRIVAL TO AVOID A PENALTY.",
|
13
|
+
"You can cancel free of charge up until the cancellation window. Cancellations or changes made after 4:00 PM Eastern Time on Sep 12, 2014 are subject to a 1 Night Room & Tax penalty. The property makes no refunds for no shows or early checkouts.",
|
14
|
+
"For the room type you've selected you can cancel your reservation for a full refund up until noon on Monday September 15th (local hotel time). If you decide to cancel your reservation anytime between noon on Monday September 15th and noon on Wednesday September 17th (local hotel time) the hotel requires payment for the first night's stay. You will be charged for the first night's stay including taxes and fees. Any remaining amount will be refunded to you. Refunds or cancellations are not available after noon local hotel time on your day of arrival (Wednesday September 17th).",
|
15
|
+
"-08FEB04 - END - CANCEL BY 0 DAYS PRIOR TO ARRIVAL, LOCAL HOTEL TIME TO AVOID A CANCELLATION PENALTY CANCEL POLICIES AND EARLY DEPARTURE FEES VARY BY HOTEL. SINCE A HOTEL CAN SET A CANCELLATION POLICY OF UP TO 30 DAYS IN ADVANCE PLEASE REVIEW POLICY PRIOR TO BOOKING TO AVOID POSSIBLE CHARGE.",
|
16
|
+
"IF CANCELLATION IS NECESSARY, TO AVOID BILLING YOU MUST RECEIVE A CANCEL NUMBER BY 6 PM ON THE DATE OF ARRIVAL. CANCEL TIMES AND DATES MAY VARY. CONTACT THE RESERVED MOTEL 6 FOR SPECIFIC POLICIES THAT MAY AFFECT YOUR RESERVATION. . -EARLY CHECKOUT POLICY -CONTACT LOCATION FOR MORE INFORMATION. .",
|
17
|
+
"-1800 HOTEL TIME, DAY OF ARRIVAL TO AVOID BILLING OF 1 NIGHT ROOM AND TAX OR FORFEITURE OF DEPOSIT",
|
18
|
+
"RESERVATIONS MUST BE CANCELLED 1DAY/24HOURS PRIOR TO ARRIVAL TO AVOID A PENALTY OF ONE NIGHT ROOM AND TAX. INDIVIDUAL PLANS MAY HAVE VARING CANCEL POLICIES. EARLY DEPARTURE FEE 1NIGHTS STAY. .",
|
19
|
+
"06NOV03 - END - CANCEL BY 1 DAYS PRIOR TO 1600 HOURS ON DAY OF ARRIVAL, LOCAL HOTEL TIME TO AVOID A CANCELLATION PENALTY CANCELLATION POLICY TEXT - DUE TO SEASONAL VARIATIONS, MOST ACCURATE CANCEL POLICY RETURNED UPON BOOKINGS. INTERNATIONAL MAXIMUM 3 ROOMS PER RESERVATION. MAXIMUM 3 PERSONS PER ROOM ONLY IF A ROLLAWAY IS AVAILABLE. DOMESTIC MAXIMUM 5 ROOMS PER RESERVATIONS. MAXIMUM 5 PERSONS PER ROOM ONLY IF ROLLAWAY IS AVAILABLE.",
|
20
|
+
"RESERVATIONS MUST BE CANCELED *24 HOURS* PRIOR TO ARRIVAL DATE TO AVOID A PENALTY OF ONE NIGHT ROOM AND TAX CHARGE INDIVIDUAL PLANS MAY VARY SEE PLAN DISPLAY FOR MORE INFORMATION **100USD EARLY DEPARTURE FEE** .",
|
21
|
+
"ALL RESERVATIONS MUST BE CANCELLED 24 HOURS PRIOR TO HOST TIME UNLESS DEPOSIT REQUIRED IF THIS RESERVATION HAS BEEN MADE ELECTRONICALLY PLEASE CANCEL IT ELECTRONICALLY TO AVOID CONFUSION AND A NO SHOW BILL. POLICY SUBJECT TO CHANGE. .",
|
22
|
+
"ALL RESERVATIONS MUST BE CANCELLED 48 HOURS PRIOR TO ARRIVAL HOST TIME IF THIS RESERVATION HAS BEEN MADE ELECTRONICALLY PLEASE CANCEL IT ELECTRONICALLY TO AVOID CONFUSION AND A NO SHOW BILL POLICY SUBJECT TO CHANGE .",
|
23
|
+
"IF CANCELLATION IS NECESSARY, TO AVOID BILLING YOU MUST RECEIVE A CANCEL NUMBER BY 6 PM ON THE DATE OF ARRIVAL. CANCEL TIMES AND DATES MAY VARY. CONTACT THE RESERVED MOTEL 6 FOR SPECIFIC POLICIES THAT MAY AFFECT YOUR RESERVATION. . -EARLY CHECKOUT POLICY -CONTACT LOCATION FOR MORE INFORMATION. .o",
|
24
|
+
"RESERVATIONS MUST BE CANCELED *72 HOURS* PRIOR TO ARRIVAL LOCAL HOTEL TIME TO AVOID A PENALTY OF ONE NIGHT ROOM AND TAX CHARGE INDIVIDAUL PLANS MAY VARY SEE PLAN DISPLAY FOR MORE INFORMATION **75USD EARLY DEPARTURE FEE**",
|
25
|
+
"- 12 NOON HOTEL TIME DAY OF ARRIVAL TO AVOID BILLING OF 1NT ROOM AND TAX OR FORFEITURE OF DEPOSIT",
|
26
|
+
"RESERVATIONS MUST BE CANCELLED BY 1600/4PM DAY OF ARRIVAL LOCAL HOTEL TIME TO AVOID A PENALTY OF ONE NIGHT ROOM AND TAX CHARGE. NOTE- INDIVIDUAL PLANS MAY VARY. PLEASE READ INDIVIDUAL DISPLAY FOR MORE INFORMATION. **50.00USD EARLY DEPARTURE FEE** .",
|
27
|
+
"- RESERVATIONS BOOKED 8 DAYS OR MORE PRIOR TO ARRIVAL MUST BE CANCELLED 7 DAYS PRIOR TO ARRIVAL TO RECEIVE REFUND - RESERVATIONS BOOKED WITHIN 7 DAYS OF ARRIVAL FORFEIT DEPOSIT OF 3 NIGHTS ROOM AND TAX",
|
28
|
+
"ALL RESERVATIONS MUST BE CANCELLED 24 HOURS PRIOR TO HOST TIME UNLESS DEPOSIT REQUIRED IF THIS RESERVATION HAS BEEN MADE ELECTRONICALLY PLEASE CANCEL IT ELECTRONICALLY TO AVOID CONFUSION AND A NO SHOW BILL POLICY SUBJECT TO CHANGE .",
|
29
|
+
"PLEASE REFER TO RATE DETAILS AT THE TIME OF BOOKING FOR CANCELLATION POLICY. PLEASE NOTE THAT POLICY IS SUBJECT TO CHANGES WITHOUT NOTICE."
|
30
|
+
]
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module TeRex
|
2
|
+
module Train
|
3
|
+
UNKNOWN = [
|
4
|
+
"The cancellation policy will be determined when the rate is validated.",
|
5
|
+
"-CANCEL POLICY MAY VARY BY DAY OF WEEK AND SEASON. THE MOST ACCURATE CANCEL POLICY IS ADVISED DURING BOOKING PROCESS. IN CASE OF A NO-SHOW THE CREDIT CARD WILL BE CHARGED ONE NIGHT STAY. OUR SYSTEM ACKNOWLEDGES ALL PROPERLY CANCELED RESERVATIONS BY RETURNING A CANCELLATION NUMBER. DO NOT ASSUME YOUR RESERVATION IS CANCELED IF YOU HAVE NOT RECEIVED A CANCELLATION NUMBER IN YOUR PNR OR BOOKING FILE. IF YOU DO NOT RECEIVE A CANCELLATION NUMBER, PLEASE CALL THE CHOICE GDS DEPARTMENT AT 1-866-953-4570",
|
6
|
+
"-14JAN02 - END - CANCEL POLICIES VARY BY HOTEL. SINCE A HOTEL CAN SET A CANCELLATION POLICY OF UP TO 30 DAYS IN ADVANCE, PLEASE REVIEW POLICY PRIOR TO BOOKING TO AVOID POSSIBLE CHARGE."
|
7
|
+
]
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require_relative "../lib/te_rex"
|
2
|
+
class TrainedBayesTest < MicroTest::Test
|
3
|
+
|
4
|
+
#Dir["#{File.dirname(__FILE__)}/test_modules/**/*.rb"].each { |f| load(f) if !!(f =~ /^[^\.].+\.rb/)}
|
5
|
+
|
6
|
+
@@refund = TeRex::Train::REFUND
|
7
|
+
@@partrefund = TeRex::Train::PARTREFUND
|
8
|
+
@@norefund = TeRex::Train::NONREFUND
|
9
|
+
@@unknown = TeRex::Train::UNKNOWN
|
10
|
+
|
11
|
+
@@cls = TeRex::Classifier::Bayes.new("Refund", "Partrefund", "Nonrefund", "Unknown")
|
12
|
+
@@refund.each {|txt| @@cls.train("Refund", txt) }
|
13
|
+
@@partrefund.each {|txt| @@cls.train("Partrefund", txt) }
|
14
|
+
@@norefund.each {|txt| @@cls.train("Nonrefund", txt) }
|
15
|
+
@@unknown.each {|txt| @@cls.train("Unknown", txt) }
|
16
|
+
|
17
|
+
test "Training Data Set Test: Random exact match sould classify correctly (but we are lenient on partrefund/refund)" do
|
18
|
+
|
19
|
+
s_refund = @@refund.sample
|
20
|
+
s_partial = @@partrefund.sample
|
21
|
+
s_non = @@norefund.sample
|
22
|
+
s_unk = @@unknown.sample
|
23
|
+
|
24
|
+
s_refund1 = @@cls.classify(s_refund)
|
25
|
+
s_partial1 = @@cls.classify(s_partial)
|
26
|
+
s_non1 = @@cls.classify(s_non)
|
27
|
+
s_unk1= @@cls.classify(s_unk)
|
28
|
+
|
29
|
+
# We are lenient on Partrefund || Refund but we still want to see when it fails
|
30
|
+
assert s_refund1 == "Refund" || "Partrefund"
|
31
|
+
# We are lenient on Refund || Partrefund because of the non-distinctness of the two.
|
32
|
+
assert s_partial1 == "Partrefund" || "Refund"
|
33
|
+
assert s_non1 == "Nonrefund"
|
34
|
+
assert s_unk1 == "Unknown"
|
35
|
+
|
36
|
+
# We are lenient on Partrefund || Refund but we still want to see when it fails
|
37
|
+
assert s_refund1 != "Partrefund"
|
38
|
+
# We are lenient on Refund || Partrefund but we still want to see when it fails
|
39
|
+
assert s_partial1 != "Refund"
|
40
|
+
assert s_non1 != "Unknown"
|
41
|
+
assert s_unk1 != "Nonrefund"
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
test "Training Data Set Test: Non-canonical examples should classify correctly" do
|
46
|
+
|
47
|
+
refund_s1 = "You will get a full refund and free cancellation"
|
48
|
+
partrefund_s1 = "You will get a refund if you cancel or change your reservation before 0201 AM on 01/31/14"
|
49
|
+
norefund_s1 = "You will get a non-refund"
|
50
|
+
unk_s1 = "You will get a nonsense am I writing here."
|
51
|
+
|
52
|
+
refund_s11 = @@cls.classify(refund_s1)
|
53
|
+
partrefund_s11 = @@cls.classify(partrefund_s1)
|
54
|
+
norefund_s11 = @@cls.classify(norefund_s1)
|
55
|
+
unk_s11 = @@cls.classify(unk_s1)
|
56
|
+
|
57
|
+
assert refund_s11 == "Refund"
|
58
|
+
assert partrefund_s11 == "Partrefund"
|
59
|
+
assert norefund_s11 == "Nonrefund"
|
60
|
+
assert unk_s11 == "Unknown"
|
61
|
+
end
|
62
|
+
|
63
|
+
test "Training Data Set Test: Micro examples should return correct classification" do
|
64
|
+
|
65
|
+
s1 = "free cancellation"
|
66
|
+
s2 = "If you cancel or change your reservation before"
|
67
|
+
s3 = "non-refund"
|
68
|
+
s4 = "policy rate validated."
|
69
|
+
|
70
|
+
s11 = @@cls.classify(s1)
|
71
|
+
s22 = @@cls.classify(s2)
|
72
|
+
s33 = @@cls.classify(s3)
|
73
|
+
s44 = @@cls.classify(s4)
|
74
|
+
|
75
|
+
assert s11 == "Refund"
|
76
|
+
assert s22 == "Partrefund"
|
77
|
+
assert s33 == "Nonrefund"
|
78
|
+
assert s44 == "Unknown"
|
79
|
+
|
80
|
+
assert s11 != "Partrefund"
|
81
|
+
assert s22 != "Refund"
|
82
|
+
assert s33 != "Unknown"
|
83
|
+
assert s44 != "Nonrefund"
|
84
|
+
end
|
85
|
+
|
86
|
+
test "Training Data Set Test: Micro examples should NOT match fake classes" do
|
87
|
+
|
88
|
+
s1 = "free cancellation"
|
89
|
+
s2 = "partial refund"
|
90
|
+
s3 = "no refund"
|
91
|
+
s4 = "policy rate validated."
|
92
|
+
|
93
|
+
s11 = @@cls.classify(s1)
|
94
|
+
s22 = @@cls.classify(s2)
|
95
|
+
s33 = @@cls.classify(s3)
|
96
|
+
s44 = @@cls.classify(s4)
|
97
|
+
|
98
|
+
assert s11 != "Computers"
|
99
|
+
assert s22 != "Science"
|
100
|
+
assert s33 != "Entertainment"
|
101
|
+
assert s44 != "Sports"
|
102
|
+
end
|
103
|
+
|
104
|
+
test "Training Data Set Test: Ambiguous examples should return 'Unknown'" do
|
105
|
+
|
106
|
+
s1 = "gobbly goop droop blithely toadwakle Grimpleshtein uf Varendorrf vun muscilaty"
|
107
|
+
s2 = "The United States announced on Tuesday it will send 3,000 troops to help tackle the Ebola outbreak as part of a ramped-up plan, including a major deployment in Liberia."
|
108
|
+
s3 = "United Parcel Service Inc is almost doubling the number of seasonal employees it hires for this year's holiday shopping season as it aims to avoid a repeat of last year's network breakdown."
|
109
|
+
s4 = "Alberto Contador wrapped up his third Vuelta a España triumph when he comfortably held on to his overall lead in the 21st and final stage time trial in a rain-soaked Santiago de Compostela on Sunday."
|
110
|
+
|
111
|
+
s11 = @@cls.classify(s1)
|
112
|
+
s22 = @@cls.classify(s2)
|
113
|
+
s33 = @@cls.classify(s3)
|
114
|
+
s44 = @@cls.classify(s4)
|
115
|
+
|
116
|
+
assert s11 == "Unknown"
|
117
|
+
assert s22 == "Unknown"
|
118
|
+
assert s33 == "Unknown"
|
119
|
+
assert s44 == "Unknown"
|
120
|
+
end
|
121
|
+
|
122
|
+
test "Training Data Set Test: Category counts are equivalent with number of training data per class" do
|
123
|
+
|
124
|
+
assert @@cls.category_counts[:Refund] == @@refund.count
|
125
|
+
assert @@cls.category_counts[:Partrefund] == @@partrefund.count
|
126
|
+
assert @@cls.category_counts[:Nonrefund] == @@norefund.count
|
127
|
+
assert @@cls.category_counts[:Unknown] == @@unknown.count
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
test "Sparse Data Set Test: Training categories should NOT be undertrained... except 'Unknown'" do
|
132
|
+
info = @@cls.training_description
|
133
|
+
puts "\nUndertraining data for SPARSE DATA SET: #{info}"
|
134
|
+
res = @@cls.under_trained?
|
135
|
+
assert res[0].include? :Unknown
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
|
metadata
ADDED
@@ -0,0 +1,231 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: te_rex
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Joshua Bowles
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fast-stemmer
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.0.2
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.0'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.0.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: bundler
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.5'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.5.3
|
43
|
+
type: :development
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '1.5'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.5.3
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: rake
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '10.3'
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 10.3.2
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '10.3'
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 10.3.2
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: micro_test
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0.4'
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.4.4
|
83
|
+
type: :development
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.4'
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 0.4.4
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: pry
|
95
|
+
requirement: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0.10'
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 0.10.1
|
103
|
+
type: :development
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.10'
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 0.10.1
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: pry-debugger
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0.2'
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 0.2.3
|
123
|
+
type: :development
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0.2'
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 0.2.3
|
133
|
+
- !ruby/object:Gem::Dependency
|
134
|
+
name: pry-rescue
|
135
|
+
requirement: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - "~>"
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '1.4'
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: 1.4.1
|
143
|
+
type: :development
|
144
|
+
prerelease: false
|
145
|
+
version_requirements: !ruby/object:Gem::Requirement
|
146
|
+
requirements:
|
147
|
+
- - "~>"
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: '1.4'
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 1.4.1
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: pry-stack_explorer
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0.4'
|
160
|
+
- - ">="
|
161
|
+
- !ruby/object:Gem::Version
|
162
|
+
version: 0.4.9.1
|
163
|
+
type: :development
|
164
|
+
prerelease: false
|
165
|
+
version_requirements: !ruby/object:Gem::Requirement
|
166
|
+
requirements:
|
167
|
+
- - "~>"
|
168
|
+
- !ruby/object:Gem::Version
|
169
|
+
version: '0.4'
|
170
|
+
- - ">="
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: 0.4.9.1
|
173
|
+
description: Simple text processing for small data sets.
|
174
|
+
email:
|
175
|
+
- jbowayles@gmail.com
|
176
|
+
executables: []
|
177
|
+
extensions: []
|
178
|
+
extra_rdoc_files: []
|
179
|
+
files:
|
180
|
+
- lib/format/corpus/basic_file.rb
|
181
|
+
- lib/format/corpus/brown_file.rb
|
182
|
+
- lib/format/format.rb
|
183
|
+
- lib/te_rex.rb
|
184
|
+
- lib/te_rex/alpha_num.rb
|
185
|
+
- lib/te_rex/bayes.rb
|
186
|
+
- lib/te_rex/bayes_data.rb
|
187
|
+
- lib/te_rex/corpus.rb
|
188
|
+
- lib/te_rex/stop_word.rb
|
189
|
+
- lib/te_rex/version.rb
|
190
|
+
- test/alpha_num_test.rb
|
191
|
+
- test/bayes_data_test.rb
|
192
|
+
- test/sparse_bayes_test.rb
|
193
|
+
- test/test_modules/nonrefund.rb
|
194
|
+
- test/test_modules/partrefund.rb
|
195
|
+
- test/test_modules/refund.rb
|
196
|
+
- test/test_modules/unknown.rb
|
197
|
+
- test/trained_bayes_test.rb
|
198
|
+
homepage: ''
|
199
|
+
licenses:
|
200
|
+
- MIT
|
201
|
+
metadata: {}
|
202
|
+
post_install_message:
|
203
|
+
rdoc_options: []
|
204
|
+
require_paths:
|
205
|
+
- lib
|
206
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
207
|
+
requirements:
|
208
|
+
- - ">="
|
209
|
+
- !ruby/object:Gem::Version
|
210
|
+
version: '0'
|
211
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - ">="
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
requirements: []
|
217
|
+
rubyforge_project:
|
218
|
+
rubygems_version: 2.4.1
|
219
|
+
signing_key:
|
220
|
+
specification_version: 4
|
221
|
+
summary: Basic NLP stuff for small data sets. Naive Bayes classification and corpora
|
222
|
+
loading.
|
223
|
+
test_files:
|
224
|
+
- test/alpha_num_test.rb
|
225
|
+
- test/bayes_data_test.rb
|
226
|
+
- test/sparse_bayes_test.rb
|
227
|
+
- test/test_modules/nonrefund.rb
|
228
|
+
- test/test_modules/partrefund.rb
|
229
|
+
- test/test_modules/refund.rb
|
230
|
+
- test/test_modules/unknown.rb
|
231
|
+
- test/trained_bayes_test.rb
|