chat_correct 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +208 -0
- data/Rakefile +4 -0
- data/chat_correct.gemspec +28 -0
- data/lib/chat_correct/capitalization.rb +13 -0
- data/lib/chat_correct/combine_multi_word_verbs.rb +51 -0
- data/lib/chat_correct/common_verb_mistake.rb +62 -0
- data/lib/chat_correct/contraction.rb +103 -0
- data/lib/chat_correct/correct.rb +352 -0
- data/lib/chat_correct/corrections_hash.rb +204 -0
- data/lib/chat_correct/mistake_analyzer.rb +40 -0
- data/lib/chat_correct/pluralization.rb +22 -0
- data/lib/chat_correct/possessive.rb +25 -0
- data/lib/chat_correct/punctuation.rb +17 -0
- data/lib/chat_correct/punctuation_masquerading_as_spelling_error.rb +14 -0
- data/lib/chat_correct/spelling.rb +20 -0
- data/lib/chat_correct/time.rb +14 -0
- data/lib/chat_correct/tokenize.rb +164 -0
- data/lib/chat_correct/verb.rb +65 -0
- data/lib/chat_correct/version.rb +3 -0
- data/lib/chat_correct.rb +16 -0
- data/spec/chat_correct/capitalization_spec.rb +17 -0
- data/spec/chat_correct/combine_multi_word_verbs_spec.rb +39 -0
- data/spec/chat_correct/common_verb_mistake_spec.rb +24 -0
- data/spec/chat_correct/contraction_spec.rb +259 -0
- data/spec/chat_correct/correct_spec.rb +1650 -0
- data/spec/chat_correct/mistake_analyzer_spec.rb +99 -0
- data/spec/chat_correct/pluralization_spec.rb +31 -0
- data/spec/chat_correct/possessive_spec.rb +31 -0
- data/spec/chat_correct/punctuation_masquerading_as_spelling_error_spec.rb +24 -0
- data/spec/chat_correct/punctuation_spec.rb +21 -0
- data/spec/chat_correct/spelling_spec.rb +59 -0
- data/spec/chat_correct/time_spec.rb +21 -0
- data/spec/chat_correct/tokenize_spec.rb +142 -0
- data/spec/chat_correct/verb_spec.rb +60 -0
- data/spec/spec_helper.rb +1 -0
- metadata +201 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class PunctuationMasqueradingAsSpellingError
|
3
|
+
attr_reader :token_a, :token_b
|
4
|
+
def initialize(token_a:, token_b:)
|
5
|
+
@token_a = token_a
|
6
|
+
@token_b = token_b
|
7
|
+
end
|
8
|
+
|
9
|
+
def exists?
|
10
|
+
(token_a.include?('ƪ') || token_b.include?('ƪ')) &&
|
11
|
+
token_a.delete("ƪ").eql?(token_b.delete("ƪ"))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'levenshtein'
|
2
|
+
|
3
|
+
module ChatCorrect
|
4
|
+
class Spelling
|
5
|
+
WORD_CHOICE = ["the", "that", "this", "on", "at", "in", "an", "it", "if", "of", "to"]
|
6
|
+
attr_reader :token_a, :token_b
|
7
|
+
def initialize(token_a:, token_b:)
|
8
|
+
@token_a = token_a
|
9
|
+
@token_b = token_b
|
10
|
+
end
|
11
|
+
|
12
|
+
def spelling_error?
|
13
|
+
token_a.length > 1 && token_b.length > 1 &&
|
14
|
+
token_a.gsub(/[[:punct:]]/, "") != "" && token_b.gsub(/[[:punct:]]/, "") != "" &&
|
15
|
+
!(token_a[0] != token_b[0] && Levenshtein.distance(token_a.downcase, token_b.downcase) > 1) &&
|
16
|
+
!(WORD_CHOICE.include?(token_a.downcase) && WORD_CHOICE.include?(token_b.downcase)) &&
|
17
|
+
Levenshtein.distance(token_a.downcase, token_b.downcase) < 3 && token_a.downcase != token_b.downcase
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class Time
|
3
|
+
attr_reader :text
|
4
|
+
def initialize(text:)
|
5
|
+
@text = text
|
6
|
+
end
|
7
|
+
|
8
|
+
def is_time?
|
9
|
+
return false if !text.include?(':') || text.to_s.partition(':').last[0].nil? || text.to_s.partition(':').first[-1].nil?
|
10
|
+
text.to_s.partition(':').last[0].gsub(/\A\d+/, '').eql?('') &&
|
11
|
+
text.to_s.partition(':').first[-1].gsub(/\A\d+/, '').eql?('')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class Tokenize
|
3
|
+
ABBREVIATIONS = ['adj', 'adm', 'adv', 'al', 'ala', 'alta', 'apr', 'arc', 'ariz', 'ark', 'art', 'assn', 'asst', 'attys', 'aug', 'ave', 'bart', 'bld', 'bldg', 'blvd', 'brig', 'bros', 'cal', 'calif', 'capt', 'cl', 'cmdr', 'co', 'col', 'colo', 'comdr', 'con', 'conn', 'corp', 'cpl', 'cres', 'ct', 'd.phil', 'dak', 'dec', 'del', 'dept', 'det', 'dist', 'dr', 'dr.phil', 'dr.philos', 'drs', 'e.g', 'ens', 'esp', 'esq', 'etc', 'exp', 'expy', 'ext', 'feb', 'fed', 'fla', 'ft', 'fwy', 'fy', 'ga', 'gen', 'gov', 'hon', 'hosp', 'hr', 'hway', 'hwy', 'i.e', 'ia', 'id', 'ida', 'ill', 'inc', 'ind', 'ing', 'insp', 'is', 'jan', 'jr', 'jul', 'jun', 'kan', 'kans', 'ken', 'ky', 'la', 'lt', 'ltd', 'maj', 'man', 'mar', 'mass', 'may', 'md', 'me', 'messrs', 'mex', 'mfg', 'mich', 'min', 'minn', 'miss', 'mlle', 'mm', 'mme', 'mo', 'mont', 'mr', 'mrs', 'ms', 'msgr', 'mssrs', 'mt', 'mtn', 'neb', 'nebr', 'nev', 'no', 'nos', 'nov', 'nr', 'oct', 'ok', 'okla', 'ont', 'op', 'ord', 'ore', 'p', 'pa', 'pd', 'pde', 'penn', 'penna', 'pfc', 'ph', 'ph.d', 'pl', 'plz', 'pp', 'prof', 'pvt', 'que', 'rd', 'ref', 'rep', 'reps', 'res', 'rev', 'rt', 'sask', 'sen', 'sens', 'sep', 'sept', 'sfc', 'sgt', 'sr', 'st', 'supt', 'surg', 'tce', 'tenn', 'tex', 'univ', 'usafa', 'u.s', 'ut', 'va', 'v', 'ver', 'vs', 'vt', 'wash', 'wis', 'wisc', 'wy', 'wyo', 'yuk']
|
4
|
+
PUNCTUATION = ['。', '.', '.', '!', '!', '?', '?', '、', '¡', '¿', '„', '“', '[', ']', '"', '#', '$', '%', '&', '(', ')', '*', '+', ',' , ':', ';', '<', '=', '>', '@', '^', '_', '`', "'", '{', '|', '}', '~', '-']
|
5
|
+
attr_reader :text
|
6
|
+
def initialize(text:)
|
7
|
+
@text = text
|
8
|
+
end
|
9
|
+
|
10
|
+
def tokenize
|
11
|
+
return if text.nil?
|
12
|
+
return [text] if /\A\w+\z/ =~ text
|
13
|
+
converted_text = convert_quotes(text)
|
14
|
+
converted_text = shift_all_punct(converted_text)
|
15
|
+
converted_text = convert_contractions(converted_text)
|
16
|
+
converted_text = convert_numbers_with_commas(converted_text)
|
17
|
+
converted_text = convert_numbers_with_periods(converted_text)
|
18
|
+
result = converted_text.split(' ')
|
19
|
+
tokenized_array = separate_other_ending_punc(separate_full_stop(result)).map do |s|
|
20
|
+
s.tr("\n", '').tr("\r", '').strip
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def tokenize_no_punct
|
25
|
+
return if text.nil? || tokenize.nil?
|
26
|
+
tokenize - PUNCTUATION
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def shift_all_punct(txt)
|
32
|
+
converted_text = shift_multiple_dash(txt)
|
33
|
+
converted_text = shift_comma(converted_text)
|
34
|
+
converted_text = shift_ellipse(converted_text)
|
35
|
+
converted_text = shift_bracket(converted_text)
|
36
|
+
converted_text = shift_other_punct(converted_text)
|
37
|
+
converted_text = shift_upsidedown_question_mark(converted_text)
|
38
|
+
converted_text = shift_upsidedown_exclamation(converted_text)
|
39
|
+
shift_special_quotes(converted_text)
|
40
|
+
end
|
41
|
+
|
42
|
+
def convert_quotes(txt)
|
43
|
+
txt.gsub(/`(?!`)(?=.*\w)/o, ' ∫ ')
|
44
|
+
.gsub(/"(?=.*\w)/o, ' ∬ ')
|
45
|
+
.gsub(/(\W|^)'(?=.*\w)(?!twas)(?!Twas)/o) { $1 ? $1 + ' ∫ ' : ' ∫ ' }
|
46
|
+
.gsub(/(\W|^)'(?=.*\w)/o, 'ƪ')
|
47
|
+
.gsub(/"/, ' ∯ ')
|
48
|
+
.gsub(/(\w|\D)'(?!')(?=\W|$)/o) { $1 + ' ∮ ' }
|
49
|
+
.squeeze(' ').strip
|
50
|
+
end
|
51
|
+
|
52
|
+
def shift_multiple_dash(txt)
|
53
|
+
txt.gsub(/--+/o, ' - ').squeeze(' ')
|
54
|
+
end
|
55
|
+
|
56
|
+
def shift_comma(txt)
|
57
|
+
txt.gsub(/,(?!\d)/o, ' , ').squeeze(' ')
|
58
|
+
end
|
59
|
+
|
60
|
+
def shift_upsidedown_question_mark(txt)
|
61
|
+
txt.gsub(/¿/, ' ¿ ')
|
62
|
+
end
|
63
|
+
|
64
|
+
def shift_upsidedown_exclamation(txt)
|
65
|
+
txt.gsub(/¡/, ' ¡ ')
|
66
|
+
end
|
67
|
+
|
68
|
+
def shift_ellipse(txt)
|
69
|
+
txt.gsub(/(\.\.\.+)/o) { ' ' + $1 + ' ' }.squeeze(' ').strip
|
70
|
+
end
|
71
|
+
|
72
|
+
def shift_special_quotes(txt)
|
73
|
+
txt.gsub(/«/, ' « ').gsub(/»/, ' » ')
|
74
|
+
.gsub(/„/, ' „ ').gsub(/“/, ' “ ')
|
75
|
+
end
|
76
|
+
|
77
|
+
def shift_bracket(txt)
|
78
|
+
txt.gsub(/([\(\[\{\}\]\)])/o) { ' ' + $1 + ' ' }.squeeze(' ').strip
|
79
|
+
end
|
80
|
+
|
81
|
+
def shift_other_punct(txt)
|
82
|
+
converted_text = shift_off_double_quotation_mark(txt)
|
83
|
+
converted_text = shift_off_double_exclamation(converted_text)
|
84
|
+
converted_text = shift_off_double_mixed_1(converted_text)
|
85
|
+
converted_text = shift_off_double_mixed_2(converted_text)
|
86
|
+
converted_text.gsub(/([\!\?\%;|])\s+/o) { ' ' + $1 + ' ' }.squeeze(' ').strip
|
87
|
+
end
|
88
|
+
|
89
|
+
def shift_off_double_quotation_mark(txt)
|
90
|
+
txt.include?('??') ? txt.gsub(/([\?\?])\s+/o) { ' ' + $1 + ' ' } : txt
|
91
|
+
end
|
92
|
+
|
93
|
+
def shift_off_double_exclamation(txt)
|
94
|
+
txt.include?('!!') ? txt.gsub(/([!!])\s+/o) { ' ' + $1 + ' ' } : txt
|
95
|
+
end
|
96
|
+
|
97
|
+
def shift_off_double_mixed_1(txt)
|
98
|
+
txt.include?('?!') ? txt.gsub(/\?\!/o) { ' ? ! ' } : txt
|
99
|
+
end
|
100
|
+
|
101
|
+
def shift_off_double_mixed_2(txt)
|
102
|
+
txt.include?('!?') ? txt.gsub(/\!\?/o) { ' ! ? ' } : txt
|
103
|
+
end
|
104
|
+
|
105
|
+
def convert_contractions(txt)
|
106
|
+
txt.gsub(/([A-Za-z])'([dms])\b/o) { $1 + 'ƪ' + $2 }
|
107
|
+
.gsub(/n't\b/o, 'nƪt')
|
108
|
+
.gsub(/'(ve|ll|re)\b/o) { 'ƪ' + $1 }
|
109
|
+
end
|
110
|
+
|
111
|
+
def convert_numbers_with_commas(txt)
|
112
|
+
txt.gsub(/(?<=\d),(?=\d)/, '☌')
|
113
|
+
end
|
114
|
+
|
115
|
+
def convert_numbers_with_periods(txt)
|
116
|
+
txt.gsub(/(?<=\d)\.(?=\d)/, '☊')
|
117
|
+
end
|
118
|
+
|
119
|
+
def separate_other_ending_punc(array)
|
120
|
+
new_array = []
|
121
|
+
punctuation = ['。', '.', '!', '!', '?', '?']
|
122
|
+
array.each do |a|
|
123
|
+
counter = false
|
124
|
+
punctuation.each do |p|
|
125
|
+
if a.length > 1
|
126
|
+
if a[-1] == p
|
127
|
+
split = a.split(p)
|
128
|
+
split.each do |b|
|
129
|
+
new_array << b
|
130
|
+
counter = true
|
131
|
+
end
|
132
|
+
new_array << p
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
if counter == false
|
137
|
+
new_array << a
|
138
|
+
end
|
139
|
+
end
|
140
|
+
new_array
|
141
|
+
end
|
142
|
+
|
143
|
+
def separate_full_stop(tokens)
|
144
|
+
words = []
|
145
|
+
tokens.each_with_index do |_t, i|
|
146
|
+
if tokens[i + 1] && tokens[i] =~ /\A(.+)\.\z/
|
147
|
+
w = $1
|
148
|
+
unless ABBREVIATIONS.include?(w.downcase) || w =~ /\A[a-z]\z/i ||
|
149
|
+
w =~ /[a-z](?:\.[a-z])+\z/i
|
150
|
+
words << w
|
151
|
+
words << '.'
|
152
|
+
next
|
153
|
+
end
|
154
|
+
end
|
155
|
+
words << tokens[i]
|
156
|
+
end
|
157
|
+
if words[-1] && words[-1] =~ /\A(.*\w)\.\z/
|
158
|
+
words[-1] = $1
|
159
|
+
words.push '.'
|
160
|
+
end
|
161
|
+
words
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'verbs'
|
2
|
+
|
3
|
+
module ChatCorrect
|
4
|
+
class Verb
|
5
|
+
attr_reader :word, :pos, :text
|
6
|
+
def initialize(word:, pos:, text:)
|
7
|
+
if word.eql?('am') || word.eql?('been') || word.eql?('are') || word.eql?('is') || word.eql?('was') || word.eql?('were')
|
8
|
+
@word = 'be'
|
9
|
+
else
|
10
|
+
@word = word
|
11
|
+
end
|
12
|
+
@pos = pos
|
13
|
+
@text = text
|
14
|
+
end
|
15
|
+
|
16
|
+
def verb_error?
|
17
|
+
!word.eql?('a') && !word.eql?('an') &&
|
18
|
+
!word.gsub(/[[:punct:]]/, '').eql?('') && !word.include?('ƪ') &&
|
19
|
+
pos.downcase[0].eql?('v') && !word.eql?('to') && check_conjugated_word(word)
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def get_verb_infinitive(word)
|
25
|
+
if word[-2..-1].eql?('ed')
|
26
|
+
if word[-3..-3].eql?('i')
|
27
|
+
word[0..-4] + 'y'
|
28
|
+
else
|
29
|
+
word[0..-3]
|
30
|
+
end
|
31
|
+
elsif word[-1].eql?('s')
|
32
|
+
word[0..-2]
|
33
|
+
else
|
34
|
+
word
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def check_conjugated_word(word)
|
39
|
+
tense = [:past, :present, :future]
|
40
|
+
person = [:first, :second, :third]
|
41
|
+
plurality = [:singular, :plural]
|
42
|
+
aspect = [:habitual, :perfect, :perfective, :progressive, :prospective]
|
43
|
+
mood = [:indicative, :imperative, :subjunctive]
|
44
|
+
|
45
|
+
tense.each do |tense|
|
46
|
+
person.each do |person|
|
47
|
+
plurality.each do |plurality|
|
48
|
+
aspect.each do |aspect|
|
49
|
+
mood.each do |mood|
|
50
|
+
if (mood.eql?(:imperative) && tense.eql?(:present) && person.eql?(:second)) || mood != :imperative
|
51
|
+
conjugated_word = get_verb_infinitive(word).verb.conjugate :tense => tense, :person => person, :plurality => plurality, :aspect => aspect, :mood => mood
|
52
|
+
if text.match(/#{conjugated_word}/) && conjugated_word.length > 0
|
53
|
+
return true
|
54
|
+
break
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/chat_correct.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "chat_correct/version"
|
2
|
+
require "chat_correct/correct"
|
3
|
+
require "chat_correct/contraction"
|
4
|
+
require "chat_correct/punctuation_masquerading_as_spelling_error"
|
5
|
+
require "chat_correct/possessive"
|
6
|
+
require "chat_correct/common_verb_mistake"
|
7
|
+
require "chat_correct/spelling"
|
8
|
+
require "chat_correct/capitalization"
|
9
|
+
require "chat_correct/pluralization"
|
10
|
+
require "chat_correct/verb"
|
11
|
+
require "chat_correct/combine_multi_word_verbs"
|
12
|
+
require "chat_correct/tokenize"
|
13
|
+
require "chat_correct/time"
|
14
|
+
require "chat_correct/punctuation"
|
15
|
+
require "chat_correct/corrections_hash"
|
16
|
+
require "chat_correct/mistake_analyzer"
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Capitalization do
|
4
|
+
it 'returns true if a capitalization error is found' do
|
5
|
+
token_a = 'Hello'
|
6
|
+
token_b = 'hello'
|
7
|
+
cc = ChatCorrect::Capitalization.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.capitalization_error?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns false if a capitalization error is not found' do
|
12
|
+
token_a = 'Hello'
|
13
|
+
token_b = 'Hello'
|
14
|
+
cc = ChatCorrect::Capitalization.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.capitalization_error?).to eq(false)
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::CombineMultiWordVerbs do
|
4
|
+
it 'returns an array' do
|
5
|
+
text = 'I would have gone to the store.'
|
6
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
7
|
+
expect(cc.combine).to eq(["I", "would have gone", "to", "the", "store", "."])
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'returns an array' do
|
11
|
+
text = 'I will go to the store.'
|
12
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
13
|
+
expect(cc.combine).to eq(["I", "will go", "to", "the", "store", "."])
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns an array' do
|
17
|
+
text = "He didn't realize that he should had changed the locks."
|
18
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
19
|
+
expect(cc.combine).to eq(["He", "didnƪt realize", "that", "he", "should", "had changed", "the", "locks", "."])
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns an array' do
|
23
|
+
text = "He hadn't realized that he should have changed the locks."
|
24
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
25
|
+
expect(cc.combine).to eq(["He", "hadnƪt realized", "that", "he", "should", "have changed", "the", "locks", "."])
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'returns an array' do
|
29
|
+
text = "I was not going to the party ."
|
30
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
31
|
+
expect(cc.combine).to eq(["I", "was not going", "to", "the", "party", "."])
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'returns an array' do
|
35
|
+
text = "I did not go to the party ."
|
36
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
37
|
+
expect(cc.combine).to eq(["I", "did not go", "to", "the", "party", "."])
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::CommonVerbMistake do
|
4
|
+
it 'returns true if a common verb mistake is found #001' do
|
5
|
+
token_a = 'freezed'
|
6
|
+
token_b = 'froze'
|
7
|
+
cc = ChatCorrect::CommonVerbMistake.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.exists?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns true if a common verb mistake is found #002' do
|
12
|
+
token_a = 'froze'
|
13
|
+
token_b = 'freezed'
|
14
|
+
cc = ChatCorrect::CommonVerbMistake.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.exists?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns true if a common verb mistake is found #003' do
|
19
|
+
token_a = 'cooked'
|
20
|
+
token_b = 'cooks'
|
21
|
+
cc = ChatCorrect::CommonVerbMistake.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.exists?).to eq(false)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Contraction do
|
4
|
+
it 'handles nil values' do
|
5
|
+
token_a = nil
|
6
|
+
token_b = nil
|
7
|
+
contraction = nil
|
8
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
9
|
+
expect(cc.contraction?).to eq(false)
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'contractions including not' do
|
13
|
+
it 'returns true if it is a contraction' do
|
14
|
+
token_a = 'am'
|
15
|
+
token_b = 'not'
|
16
|
+
contraction = 'ainƪt'
|
17
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
18
|
+
expect(cc.contraction?).to eq(true)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns true if it is a contraction' do
|
22
|
+
token_a = 'am'
|
23
|
+
token_b = 'not'
|
24
|
+
contraction = "ain't"
|
25
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
26
|
+
expect(cc.contraction?).to eq(true)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns true if it is a contraction' do
|
30
|
+
token_a = 'is'
|
31
|
+
token_b = 'not'
|
32
|
+
contraction = "isn't"
|
33
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
34
|
+
expect(cc.contraction?).to eq(true)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns true if it is a contraction' do
|
38
|
+
token_a = 'should'
|
39
|
+
token_b = 'not'
|
40
|
+
contraction = "shouldn't"
|
41
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
42
|
+
expect(cc.contraction?).to eq(true)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'returns false if it is a contraction' do
|
46
|
+
token_a = 'is'
|
47
|
+
token_b = 'not'
|
48
|
+
contraction = "is't"
|
49
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
50
|
+
expect(cc.contraction?).to eq(false)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
context 'contractions including us' do
|
55
|
+
it 'returns true if it is a contraction' do
|
56
|
+
token_a = 'let'
|
57
|
+
token_b = 'us'
|
58
|
+
contraction = "let's"
|
59
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
60
|
+
expect(cc.contraction?).to eq(true)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context 'contractions including am' do
|
65
|
+
it 'returns true if it is a contraction' do
|
66
|
+
token_a = 'I'
|
67
|
+
token_b = 'am'
|
68
|
+
contraction = "I'm"
|
69
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
70
|
+
expect(cc.contraction?).to eq(true)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context 'contractions including are' do
|
75
|
+
it 'returns true if it is a contraction' do
|
76
|
+
token_a = 'You'
|
77
|
+
token_b = 'are'
|
78
|
+
contraction = "You're"
|
79
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
80
|
+
expect(cc.contraction?).to eq(true)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'returns false if it is a contraction' do
|
84
|
+
token_a = 'You'
|
85
|
+
token_b = 'are'
|
86
|
+
contraction = "you'rre"
|
87
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
88
|
+
expect(cc.contraction?).to eq(false)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context 'contractions including is, does, has' do
|
93
|
+
it 'returns true if it is a contraction' do
|
94
|
+
token_a = 'she'
|
95
|
+
token_b = 'is'
|
96
|
+
contraction = "she's"
|
97
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
98
|
+
expect(cc.contraction?).to eq(true)
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'returns true if it is a contraction' do
|
102
|
+
token_a = 'she'
|
103
|
+
token_b = 'does'
|
104
|
+
contraction = "she's"
|
105
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
106
|
+
expect(cc.contraction?).to eq(true)
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'returns true if it is a contraction' do
|
110
|
+
token_a = 'she'
|
111
|
+
token_b = 'has'
|
112
|
+
contraction = "she's"
|
113
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
114
|
+
expect(cc.contraction?).to eq(true)
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'returns false if it is not a contraction' do
|
118
|
+
token_a = 'she'
|
119
|
+
token_b = 'has'
|
120
|
+
contraction = "she'ss"
|
121
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
122
|
+
expect(cc.contraction?).to eq(false)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context 'contractions including have' do
|
127
|
+
it 'returns true if it is a contraction' do
|
128
|
+
token_a = 'You'
|
129
|
+
token_b = 'have'
|
130
|
+
contraction = "You've"
|
131
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
132
|
+
expect(cc.contraction?).to eq(true)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
context 'contractions including had, did, would' do
|
137
|
+
it 'returns true if it is a contraction' do
|
138
|
+
token_a = 'she'
|
139
|
+
token_b = 'had'
|
140
|
+
contraction = "she'd"
|
141
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
142
|
+
expect(cc.contraction?).to eq(true)
|
143
|
+
end
|
144
|
+
|
145
|
+
it 'returns true if it is a contraction' do
|
146
|
+
token_a = 'she'
|
147
|
+
token_b = 'did'
|
148
|
+
contraction = "she'd"
|
149
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
150
|
+
expect(cc.contraction?).to eq(true)
|
151
|
+
end
|
152
|
+
|
153
|
+
it 'returns true if it is a contraction' do
|
154
|
+
token_a = 'she'
|
155
|
+
token_b = 'would'
|
156
|
+
contraction = "she'd"
|
157
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
158
|
+
expect(cc.contraction?).to eq(true)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
context 'contractions including will' do
|
163
|
+
it 'returns true if it is a contraction' do
|
164
|
+
token_a = 'You'
|
165
|
+
token_b = 'will'
|
166
|
+
contraction = "You'll"
|
167
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
168
|
+
expect(cc.contraction?).to eq(true)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
context 'contractions including of' do
|
173
|
+
it 'returns true if it is a contraction #001' do
|
174
|
+
token_a = 'of'
|
175
|
+
token_b = 'monkeys'
|
176
|
+
contraction = "o' monkeys"
|
177
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
178
|
+
expect(cc.contraction?).to eq(true)
|
179
|
+
end
|
180
|
+
|
181
|
+
it 'returns true if it is a contraction #002' do
|
182
|
+
token_a = 'of'
|
183
|
+
token_b = nil
|
184
|
+
contraction = "o'"
|
185
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
186
|
+
expect(cc.contraction?).to eq(true)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
context 'contractions including it' do
|
191
|
+
it 'returns true if it is a contraction' do
|
192
|
+
token_a = 'It'
|
193
|
+
token_b = 'was'
|
194
|
+
contraction = "'Twas"
|
195
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
196
|
+
expect(cc.contraction?).to eq(true)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
context 'contractions including them' do
|
201
|
+
it 'returns true if it is a contraction' do
|
202
|
+
token_a = 'leave'
|
203
|
+
token_b = 'them'
|
204
|
+
contraction = "leave 'em"
|
205
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
206
|
+
expect(cc.contraction?).to eq(true)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
context 'irregular contractions' do
|
211
|
+
it 'returns true if it is a contraction #001' do
|
212
|
+
token_a = 'is'
|
213
|
+
token_b = 'not'
|
214
|
+
contraction = "ain't"
|
215
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
216
|
+
expect(cc.contraction?).to eq(true)
|
217
|
+
end
|
218
|
+
|
219
|
+
it 'returns true if it is a contraction #002' do
|
220
|
+
token_a = 'madam'
|
221
|
+
token_b = nil
|
222
|
+
contraction = "ma'am"
|
223
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
224
|
+
expect(cc.contraction?).to eq(true)
|
225
|
+
end
|
226
|
+
|
227
|
+
it 'returns true if it is a contraction #003' do
|
228
|
+
token_a = 'never-do-well'
|
229
|
+
token_b = nil
|
230
|
+
contraction = "ne'er-do-well"
|
231
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
232
|
+
expect(cc.contraction?).to eq(true)
|
233
|
+
end
|
234
|
+
|
235
|
+
it 'returns true if it is a contraction #004' do
|
236
|
+
token_a = 'cat-of-nine-tails'
|
237
|
+
token_b = nil
|
238
|
+
contraction = "cat-o'-nine-tails"
|
239
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
240
|
+
expect(cc.contraction?).to eq(true)
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'returns true if it is a contraction #005' do
|
244
|
+
token_a = 'jack-of-the-lantern'
|
245
|
+
token_b = nil
|
246
|
+
contraction = "jack-o'-lantern"
|
247
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
248
|
+
expect(cc.contraction?).to eq(true)
|
249
|
+
end
|
250
|
+
|
251
|
+
it 'returns true if it is a contraction #006' do
|
252
|
+
token_a = 'will-of-the-wisp'
|
253
|
+
token_b = nil
|
254
|
+
contraction = "will-o'-the-wisp"
|
255
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
256
|
+
expect(cc.contraction?).to eq(true)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|