chat_correct 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +208 -0
- data/Rakefile +4 -0
- data/chat_correct.gemspec +28 -0
- data/lib/chat_correct/capitalization.rb +13 -0
- data/lib/chat_correct/combine_multi_word_verbs.rb +51 -0
- data/lib/chat_correct/common_verb_mistake.rb +62 -0
- data/lib/chat_correct/contraction.rb +103 -0
- data/lib/chat_correct/correct.rb +352 -0
- data/lib/chat_correct/corrections_hash.rb +204 -0
- data/lib/chat_correct/mistake_analyzer.rb +40 -0
- data/lib/chat_correct/pluralization.rb +22 -0
- data/lib/chat_correct/possessive.rb +25 -0
- data/lib/chat_correct/punctuation.rb +17 -0
- data/lib/chat_correct/punctuation_masquerading_as_spelling_error.rb +14 -0
- data/lib/chat_correct/spelling.rb +20 -0
- data/lib/chat_correct/time.rb +14 -0
- data/lib/chat_correct/tokenize.rb +164 -0
- data/lib/chat_correct/verb.rb +65 -0
- data/lib/chat_correct/version.rb +3 -0
- data/lib/chat_correct.rb +16 -0
- data/spec/chat_correct/capitalization_spec.rb +17 -0
- data/spec/chat_correct/combine_multi_word_verbs_spec.rb +39 -0
- data/spec/chat_correct/common_verb_mistake_spec.rb +24 -0
- data/spec/chat_correct/contraction_spec.rb +259 -0
- data/spec/chat_correct/correct_spec.rb +1650 -0
- data/spec/chat_correct/mistake_analyzer_spec.rb +99 -0
- data/spec/chat_correct/pluralization_spec.rb +31 -0
- data/spec/chat_correct/possessive_spec.rb +31 -0
- data/spec/chat_correct/punctuation_masquerading_as_spelling_error_spec.rb +24 -0
- data/spec/chat_correct/punctuation_spec.rb +21 -0
- data/spec/chat_correct/spelling_spec.rb +59 -0
- data/spec/chat_correct/time_spec.rb +21 -0
- data/spec/chat_correct/tokenize_spec.rb +142 -0
- data/spec/chat_correct/verb_spec.rb +60 -0
- data/spec/spec_helper.rb +1 -0
- metadata +201 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class PunctuationMasqueradingAsSpellingError
|
3
|
+
attr_reader :token_a, :token_b
|
4
|
+
def initialize(token_a:, token_b:)
|
5
|
+
@token_a = token_a
|
6
|
+
@token_b = token_b
|
7
|
+
end
|
8
|
+
|
9
|
+
def exists?
|
10
|
+
(token_a.include?('ƪ') || token_b.include?('ƪ')) &&
|
11
|
+
token_a.delete("ƪ").eql?(token_b.delete("ƪ"))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'levenshtein'
|
2
|
+
|
3
|
+
module ChatCorrect
|
4
|
+
class Spelling
|
5
|
+
WORD_CHOICE = ["the", "that", "this", "on", "at", "in", "an", "it", "if", "of", "to"]
|
6
|
+
attr_reader :token_a, :token_b
|
7
|
+
def initialize(token_a:, token_b:)
|
8
|
+
@token_a = token_a
|
9
|
+
@token_b = token_b
|
10
|
+
end
|
11
|
+
|
12
|
+
def spelling_error?
|
13
|
+
token_a.length > 1 && token_b.length > 1 &&
|
14
|
+
token_a.gsub(/[[:punct:]]/, "") != "" && token_b.gsub(/[[:punct:]]/, "") != "" &&
|
15
|
+
!(token_a[0] != token_b[0] && Levenshtein.distance(token_a.downcase, token_b.downcase) > 1) &&
|
16
|
+
!(WORD_CHOICE.include?(token_a.downcase) && WORD_CHOICE.include?(token_b.downcase)) &&
|
17
|
+
Levenshtein.distance(token_a.downcase, token_b.downcase) < 3 && token_a.downcase != token_b.downcase
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class Time
|
3
|
+
attr_reader :text
|
4
|
+
def initialize(text:)
|
5
|
+
@text = text
|
6
|
+
end
|
7
|
+
|
8
|
+
def is_time?
|
9
|
+
return false if !text.include?(':') || text.to_s.partition(':').last[0].nil? || text.to_s.partition(':').first[-1].nil?
|
10
|
+
text.to_s.partition(':').last[0].gsub(/\A\d+/, '').eql?('') &&
|
11
|
+
text.to_s.partition(':').first[-1].gsub(/\A\d+/, '').eql?('')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class Tokenize
|
3
|
+
ABBREVIATIONS = ['adj', 'adm', 'adv', 'al', 'ala', 'alta', 'apr', 'arc', 'ariz', 'ark', 'art', 'assn', 'asst', 'attys', 'aug', 'ave', 'bart', 'bld', 'bldg', 'blvd', 'brig', 'bros', 'cal', 'calif', 'capt', 'cl', 'cmdr', 'co', 'col', 'colo', 'comdr', 'con', 'conn', 'corp', 'cpl', 'cres', 'ct', 'd.phil', 'dak', 'dec', 'del', 'dept', 'det', 'dist', 'dr', 'dr.phil', 'dr.philos', 'drs', 'e.g', 'ens', 'esp', 'esq', 'etc', 'exp', 'expy', 'ext', 'feb', 'fed', 'fla', 'ft', 'fwy', 'fy', 'ga', 'gen', 'gov', 'hon', 'hosp', 'hr', 'hway', 'hwy', 'i.e', 'ia', 'id', 'ida', 'ill', 'inc', 'ind', 'ing', 'insp', 'is', 'jan', 'jr', 'jul', 'jun', 'kan', 'kans', 'ken', 'ky', 'la', 'lt', 'ltd', 'maj', 'man', 'mar', 'mass', 'may', 'md', 'me', 'messrs', 'mex', 'mfg', 'mich', 'min', 'minn', 'miss', 'mlle', 'mm', 'mme', 'mo', 'mont', 'mr', 'mrs', 'ms', 'msgr', 'mssrs', 'mt', 'mtn', 'neb', 'nebr', 'nev', 'no', 'nos', 'nov', 'nr', 'oct', 'ok', 'okla', 'ont', 'op', 'ord', 'ore', 'p', 'pa', 'pd', 'pde', 'penn', 'penna', 'pfc', 'ph', 'ph.d', 'pl', 'plz', 'pp', 'prof', 'pvt', 'que', 'rd', 'ref', 'rep', 'reps', 'res', 'rev', 'rt', 'sask', 'sen', 'sens', 'sep', 'sept', 'sfc', 'sgt', 'sr', 'st', 'supt', 'surg', 'tce', 'tenn', 'tex', 'univ', 'usafa', 'u.s', 'ut', 'va', 'v', 'ver', 'vs', 'vt', 'wash', 'wis', 'wisc', 'wy', 'wyo', 'yuk']
|
4
|
+
PUNCTUATION = ['。', '.', '.', '!', '!', '?', '?', '、', '¡', '¿', '„', '“', '[', ']', '"', '#', '$', '%', '&', '(', ')', '*', '+', ',' , ':', ';', '<', '=', '>', '@', '^', '_', '`', "'", '{', '|', '}', '~', '-']
|
5
|
+
attr_reader :text
|
6
|
+
def initialize(text:)
|
7
|
+
@text = text
|
8
|
+
end
|
9
|
+
|
10
|
+
def tokenize
|
11
|
+
return if text.nil?
|
12
|
+
return [text] if /\A\w+\z/ =~ text
|
13
|
+
converted_text = convert_quotes(text)
|
14
|
+
converted_text = shift_all_punct(converted_text)
|
15
|
+
converted_text = convert_contractions(converted_text)
|
16
|
+
converted_text = convert_numbers_with_commas(converted_text)
|
17
|
+
converted_text = convert_numbers_with_periods(converted_text)
|
18
|
+
result = converted_text.split(' ')
|
19
|
+
tokenized_array = separate_other_ending_punc(separate_full_stop(result)).map do |s|
|
20
|
+
s.tr("\n", '').tr("\r", '').strip
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def tokenize_no_punct
|
25
|
+
return if text.nil? || tokenize.nil?
|
26
|
+
tokenize - PUNCTUATION
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def shift_all_punct(txt)
|
32
|
+
converted_text = shift_multiple_dash(txt)
|
33
|
+
converted_text = shift_comma(converted_text)
|
34
|
+
converted_text = shift_ellipse(converted_text)
|
35
|
+
converted_text = shift_bracket(converted_text)
|
36
|
+
converted_text = shift_other_punct(converted_text)
|
37
|
+
converted_text = shift_upsidedown_question_mark(converted_text)
|
38
|
+
converted_text = shift_upsidedown_exclamation(converted_text)
|
39
|
+
shift_special_quotes(converted_text)
|
40
|
+
end
|
41
|
+
|
42
|
+
def convert_quotes(txt)
|
43
|
+
txt.gsub(/`(?!`)(?=.*\w)/o, ' ∫ ')
|
44
|
+
.gsub(/"(?=.*\w)/o, ' ∬ ')
|
45
|
+
.gsub(/(\W|^)'(?=.*\w)(?!twas)(?!Twas)/o) { $1 ? $1 + ' ∫ ' : ' ∫ ' }
|
46
|
+
.gsub(/(\W|^)'(?=.*\w)/o, 'ƪ')
|
47
|
+
.gsub(/"/, ' ∯ ')
|
48
|
+
.gsub(/(\w|\D)'(?!')(?=\W|$)/o) { $1 + ' ∮ ' }
|
49
|
+
.squeeze(' ').strip
|
50
|
+
end
|
51
|
+
|
52
|
+
def shift_multiple_dash(txt)
|
53
|
+
txt.gsub(/--+/o, ' - ').squeeze(' ')
|
54
|
+
end
|
55
|
+
|
56
|
+
def shift_comma(txt)
|
57
|
+
txt.gsub(/,(?!\d)/o, ' , ').squeeze(' ')
|
58
|
+
end
|
59
|
+
|
60
|
+
def shift_upsidedown_question_mark(txt)
|
61
|
+
txt.gsub(/¿/, ' ¿ ')
|
62
|
+
end
|
63
|
+
|
64
|
+
def shift_upsidedown_exclamation(txt)
|
65
|
+
txt.gsub(/¡/, ' ¡ ')
|
66
|
+
end
|
67
|
+
|
68
|
+
def shift_ellipse(txt)
|
69
|
+
txt.gsub(/(\.\.\.+)/o) { ' ' + $1 + ' ' }.squeeze(' ').strip
|
70
|
+
end
|
71
|
+
|
72
|
+
def shift_special_quotes(txt)
|
73
|
+
txt.gsub(/«/, ' « ').gsub(/»/, ' » ')
|
74
|
+
.gsub(/„/, ' „ ').gsub(/“/, ' “ ')
|
75
|
+
end
|
76
|
+
|
77
|
+
def shift_bracket(txt)
|
78
|
+
txt.gsub(/([\(\[\{\}\]\)])/o) { ' ' + $1 + ' ' }.squeeze(' ').strip
|
79
|
+
end
|
80
|
+
|
81
|
+
def shift_other_punct(txt)
|
82
|
+
converted_text = shift_off_double_quotation_mark(txt)
|
83
|
+
converted_text = shift_off_double_exclamation(converted_text)
|
84
|
+
converted_text = shift_off_double_mixed_1(converted_text)
|
85
|
+
converted_text = shift_off_double_mixed_2(converted_text)
|
86
|
+
converted_text.gsub(/([\!\?\%;|])\s+/o) { ' ' + $1 + ' ' }.squeeze(' ').strip
|
87
|
+
end
|
88
|
+
|
89
|
+
def shift_off_double_quotation_mark(txt)
|
90
|
+
txt.include?('??') ? txt.gsub(/([\?\?])\s+/o) { ' ' + $1 + ' ' } : txt
|
91
|
+
end
|
92
|
+
|
93
|
+
def shift_off_double_exclamation(txt)
|
94
|
+
txt.include?('!!') ? txt.gsub(/([!!])\s+/o) { ' ' + $1 + ' ' } : txt
|
95
|
+
end
|
96
|
+
|
97
|
+
def shift_off_double_mixed_1(txt)
|
98
|
+
txt.include?('?!') ? txt.gsub(/\?\!/o) { ' ? ! ' } : txt
|
99
|
+
end
|
100
|
+
|
101
|
+
def shift_off_double_mixed_2(txt)
|
102
|
+
txt.include?('!?') ? txt.gsub(/\!\?/o) { ' ! ? ' } : txt
|
103
|
+
end
|
104
|
+
|
105
|
+
def convert_contractions(txt)
|
106
|
+
txt.gsub(/([A-Za-z])'([dms])\b/o) { $1 + 'ƪ' + $2 }
|
107
|
+
.gsub(/n't\b/o, 'nƪt')
|
108
|
+
.gsub(/'(ve|ll|re)\b/o) { 'ƪ' + $1 }
|
109
|
+
end
|
110
|
+
|
111
|
+
def convert_numbers_with_commas(txt)
|
112
|
+
txt.gsub(/(?<=\d),(?=\d)/, '☌')
|
113
|
+
end
|
114
|
+
|
115
|
+
def convert_numbers_with_periods(txt)
|
116
|
+
txt.gsub(/(?<=\d)\.(?=\d)/, '☊')
|
117
|
+
end
|
118
|
+
|
119
|
+
def separate_other_ending_punc(array)
|
120
|
+
new_array = []
|
121
|
+
punctuation = ['。', '.', '!', '!', '?', '?']
|
122
|
+
array.each do |a|
|
123
|
+
counter = false
|
124
|
+
punctuation.each do |p|
|
125
|
+
if a.length > 1
|
126
|
+
if a[-1] == p
|
127
|
+
split = a.split(p)
|
128
|
+
split.each do |b|
|
129
|
+
new_array << b
|
130
|
+
counter = true
|
131
|
+
end
|
132
|
+
new_array << p
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
if counter == false
|
137
|
+
new_array << a
|
138
|
+
end
|
139
|
+
end
|
140
|
+
new_array
|
141
|
+
end
|
142
|
+
|
143
|
+
def separate_full_stop(tokens)
|
144
|
+
words = []
|
145
|
+
tokens.each_with_index do |_t, i|
|
146
|
+
if tokens[i + 1] && tokens[i] =~ /\A(.+)\.\z/
|
147
|
+
w = $1
|
148
|
+
unless ABBREVIATIONS.include?(w.downcase) || w =~ /\A[a-z]\z/i ||
|
149
|
+
w =~ /[a-z](?:\.[a-z])+\z/i
|
150
|
+
words << w
|
151
|
+
words << '.'
|
152
|
+
next
|
153
|
+
end
|
154
|
+
end
|
155
|
+
words << tokens[i]
|
156
|
+
end
|
157
|
+
if words[-1] && words[-1] =~ /\A(.*\w)\.\z/
|
158
|
+
words[-1] = $1
|
159
|
+
words.push '.'
|
160
|
+
end
|
161
|
+
words
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'verbs'
|
2
|
+
|
3
|
+
module ChatCorrect
|
4
|
+
class Verb
|
5
|
+
attr_reader :word, :pos, :text
|
6
|
+
def initialize(word:, pos:, text:)
|
7
|
+
if word.eql?('am') || word.eql?('been') || word.eql?('are') || word.eql?('is') || word.eql?('was') || word.eql?('were')
|
8
|
+
@word = 'be'
|
9
|
+
else
|
10
|
+
@word = word
|
11
|
+
end
|
12
|
+
@pos = pos
|
13
|
+
@text = text
|
14
|
+
end
|
15
|
+
|
16
|
+
def verb_error?
|
17
|
+
!word.eql?('a') && !word.eql?('an') &&
|
18
|
+
!word.gsub(/[[:punct:]]/, '').eql?('') && !word.include?('ƪ') &&
|
19
|
+
pos.downcase[0].eql?('v') && !word.eql?('to') && check_conjugated_word(word)
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def get_verb_infinitive(word)
|
25
|
+
if word[-2..-1].eql?('ed')
|
26
|
+
if word[-3..-3].eql?('i')
|
27
|
+
word[0..-4] + 'y'
|
28
|
+
else
|
29
|
+
word[0..-3]
|
30
|
+
end
|
31
|
+
elsif word[-1].eql?('s')
|
32
|
+
word[0..-2]
|
33
|
+
else
|
34
|
+
word
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def check_conjugated_word(word)
|
39
|
+
tense = [:past, :present, :future]
|
40
|
+
person = [:first, :second, :third]
|
41
|
+
plurality = [:singular, :plural]
|
42
|
+
aspect = [:habitual, :perfect, :perfective, :progressive, :prospective]
|
43
|
+
mood = [:indicative, :imperative, :subjunctive]
|
44
|
+
|
45
|
+
tense.each do |tense|
|
46
|
+
person.each do |person|
|
47
|
+
plurality.each do |plurality|
|
48
|
+
aspect.each do |aspect|
|
49
|
+
mood.each do |mood|
|
50
|
+
if (mood.eql?(:imperative) && tense.eql?(:present) && person.eql?(:second)) || mood != :imperative
|
51
|
+
conjugated_word = get_verb_infinitive(word).verb.conjugate :tense => tense, :person => person, :plurality => plurality, :aspect => aspect, :mood => mood
|
52
|
+
if text.match(/#{conjugated_word}/) && conjugated_word.length > 0
|
53
|
+
return true
|
54
|
+
break
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/chat_correct.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "chat_correct/version"
|
2
|
+
require "chat_correct/correct"
|
3
|
+
require "chat_correct/contraction"
|
4
|
+
require "chat_correct/punctuation_masquerading_as_spelling_error"
|
5
|
+
require "chat_correct/possessive"
|
6
|
+
require "chat_correct/common_verb_mistake"
|
7
|
+
require "chat_correct/spelling"
|
8
|
+
require "chat_correct/capitalization"
|
9
|
+
require "chat_correct/pluralization"
|
10
|
+
require "chat_correct/verb"
|
11
|
+
require "chat_correct/combine_multi_word_verbs"
|
12
|
+
require "chat_correct/tokenize"
|
13
|
+
require "chat_correct/time"
|
14
|
+
require "chat_correct/punctuation"
|
15
|
+
require "chat_correct/corrections_hash"
|
16
|
+
require "chat_correct/mistake_analyzer"
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Capitalization do
|
4
|
+
it 'returns true if a capitalization error is found' do
|
5
|
+
token_a = 'Hello'
|
6
|
+
token_b = 'hello'
|
7
|
+
cc = ChatCorrect::Capitalization.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.capitalization_error?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns false if a capitalization error is not found' do
|
12
|
+
token_a = 'Hello'
|
13
|
+
token_b = 'Hello'
|
14
|
+
cc = ChatCorrect::Capitalization.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.capitalization_error?).to eq(false)
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::CombineMultiWordVerbs do
|
4
|
+
it 'returns an array' do
|
5
|
+
text = 'I would have gone to the store.'
|
6
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
7
|
+
expect(cc.combine).to eq(["I", "would have gone", "to", "the", "store", "."])
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'returns an array' do
|
11
|
+
text = 'I will go to the store.'
|
12
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
13
|
+
expect(cc.combine).to eq(["I", "will go", "to", "the", "store", "."])
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns an array' do
|
17
|
+
text = "He didn't realize that he should had changed the locks."
|
18
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
19
|
+
expect(cc.combine).to eq(["He", "didnƪt realize", "that", "he", "should", "had changed", "the", "locks", "."])
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns an array' do
|
23
|
+
text = "He hadn't realized that he should have changed the locks."
|
24
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
25
|
+
expect(cc.combine).to eq(["He", "hadnƪt realized", "that", "he", "should", "have changed", "the", "locks", "."])
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'returns an array' do
|
29
|
+
text = "I was not going to the party ."
|
30
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
31
|
+
expect(cc.combine).to eq(["I", "was not going", "to", "the", "party", "."])
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'returns an array' do
|
35
|
+
text = "I did not go to the party ."
|
36
|
+
cc = ChatCorrect::CombineMultiWordVerbs.new(text: text)
|
37
|
+
expect(cc.combine).to eq(["I", "did not go", "to", "the", "party", "."])
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::CommonVerbMistake do
|
4
|
+
it 'returns true if a common verb mistake is found #001' do
|
5
|
+
token_a = 'freezed'
|
6
|
+
token_b = 'froze'
|
7
|
+
cc = ChatCorrect::CommonVerbMistake.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.exists?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns true if a common verb mistake is found #002' do
|
12
|
+
token_a = 'froze'
|
13
|
+
token_b = 'freezed'
|
14
|
+
cc = ChatCorrect::CommonVerbMistake.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.exists?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns true if a common verb mistake is found #003' do
|
19
|
+
token_a = 'cooked'
|
20
|
+
token_b = 'cooks'
|
21
|
+
cc = ChatCorrect::CommonVerbMistake.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.exists?).to eq(false)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Contraction do
|
4
|
+
it 'handles nil values' do
|
5
|
+
token_a = nil
|
6
|
+
token_b = nil
|
7
|
+
contraction = nil
|
8
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
9
|
+
expect(cc.contraction?).to eq(false)
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'contractions including not' do
|
13
|
+
it 'returns true if it is a contraction' do
|
14
|
+
token_a = 'am'
|
15
|
+
token_b = 'not'
|
16
|
+
contraction = 'ainƪt'
|
17
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
18
|
+
expect(cc.contraction?).to eq(true)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns true if it is a contraction' do
|
22
|
+
token_a = 'am'
|
23
|
+
token_b = 'not'
|
24
|
+
contraction = "ain't"
|
25
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
26
|
+
expect(cc.contraction?).to eq(true)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns true if it is a contraction' do
|
30
|
+
token_a = 'is'
|
31
|
+
token_b = 'not'
|
32
|
+
contraction = "isn't"
|
33
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
34
|
+
expect(cc.contraction?).to eq(true)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns true if it is a contraction' do
|
38
|
+
token_a = 'should'
|
39
|
+
token_b = 'not'
|
40
|
+
contraction = "shouldn't"
|
41
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
42
|
+
expect(cc.contraction?).to eq(true)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'returns false if it is a contraction' do
|
46
|
+
token_a = 'is'
|
47
|
+
token_b = 'not'
|
48
|
+
contraction = "is't"
|
49
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
50
|
+
expect(cc.contraction?).to eq(false)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
context 'contractions including us' do
|
55
|
+
it 'returns true if it is a contraction' do
|
56
|
+
token_a = 'let'
|
57
|
+
token_b = 'us'
|
58
|
+
contraction = "let's"
|
59
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
60
|
+
expect(cc.contraction?).to eq(true)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context 'contractions including am' do
|
65
|
+
it 'returns true if it is a contraction' do
|
66
|
+
token_a = 'I'
|
67
|
+
token_b = 'am'
|
68
|
+
contraction = "I'm"
|
69
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
70
|
+
expect(cc.contraction?).to eq(true)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context 'contractions including are' do
|
75
|
+
it 'returns true if it is a contraction' do
|
76
|
+
token_a = 'You'
|
77
|
+
token_b = 'are'
|
78
|
+
contraction = "You're"
|
79
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
80
|
+
expect(cc.contraction?).to eq(true)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'returns false if it is a contraction' do
|
84
|
+
token_a = 'You'
|
85
|
+
token_b = 'are'
|
86
|
+
contraction = "you'rre"
|
87
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
88
|
+
expect(cc.contraction?).to eq(false)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context 'contractions including is, does, has' do
|
93
|
+
it 'returns true if it is a contraction' do
|
94
|
+
token_a = 'she'
|
95
|
+
token_b = 'is'
|
96
|
+
contraction = "she's"
|
97
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
98
|
+
expect(cc.contraction?).to eq(true)
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'returns true if it is a contraction' do
|
102
|
+
token_a = 'she'
|
103
|
+
token_b = 'does'
|
104
|
+
contraction = "she's"
|
105
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
106
|
+
expect(cc.contraction?).to eq(true)
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'returns true if it is a contraction' do
|
110
|
+
token_a = 'she'
|
111
|
+
token_b = 'has'
|
112
|
+
contraction = "she's"
|
113
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
114
|
+
expect(cc.contraction?).to eq(true)
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'returns false if it is not a contraction' do
|
118
|
+
token_a = 'she'
|
119
|
+
token_b = 'has'
|
120
|
+
contraction = "she'ss"
|
121
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
122
|
+
expect(cc.contraction?).to eq(false)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context 'contractions including have' do
|
127
|
+
it 'returns true if it is a contraction' do
|
128
|
+
token_a = 'You'
|
129
|
+
token_b = 'have'
|
130
|
+
contraction = "You've"
|
131
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
132
|
+
expect(cc.contraction?).to eq(true)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
context 'contractions including had, did, would' do
|
137
|
+
it 'returns true if it is a contraction' do
|
138
|
+
token_a = 'she'
|
139
|
+
token_b = 'had'
|
140
|
+
contraction = "she'd"
|
141
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
142
|
+
expect(cc.contraction?).to eq(true)
|
143
|
+
end
|
144
|
+
|
145
|
+
it 'returns true if it is a contraction' do
|
146
|
+
token_a = 'she'
|
147
|
+
token_b = 'did'
|
148
|
+
contraction = "she'd"
|
149
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
150
|
+
expect(cc.contraction?).to eq(true)
|
151
|
+
end
|
152
|
+
|
153
|
+
it 'returns true if it is a contraction' do
|
154
|
+
token_a = 'she'
|
155
|
+
token_b = 'would'
|
156
|
+
contraction = "she'd"
|
157
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
158
|
+
expect(cc.contraction?).to eq(true)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
context 'contractions including will' do
|
163
|
+
it 'returns true if it is a contraction' do
|
164
|
+
token_a = 'You'
|
165
|
+
token_b = 'will'
|
166
|
+
contraction = "You'll"
|
167
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
168
|
+
expect(cc.contraction?).to eq(true)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
context 'contractions including of' do
|
173
|
+
it 'returns true if it is a contraction #001' do
|
174
|
+
token_a = 'of'
|
175
|
+
token_b = 'monkeys'
|
176
|
+
contraction = "o' monkeys"
|
177
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
178
|
+
expect(cc.contraction?).to eq(true)
|
179
|
+
end
|
180
|
+
|
181
|
+
it 'returns true if it is a contraction #002' do
|
182
|
+
token_a = 'of'
|
183
|
+
token_b = nil
|
184
|
+
contraction = "o'"
|
185
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
186
|
+
expect(cc.contraction?).to eq(true)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
context 'contractions including it' do
|
191
|
+
it 'returns true if it is a contraction' do
|
192
|
+
token_a = 'It'
|
193
|
+
token_b = 'was'
|
194
|
+
contraction = "'Twas"
|
195
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
196
|
+
expect(cc.contraction?).to eq(true)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
context 'contractions including them' do
|
201
|
+
it 'returns true if it is a contraction' do
|
202
|
+
token_a = 'leave'
|
203
|
+
token_b = 'them'
|
204
|
+
contraction = "leave 'em"
|
205
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
206
|
+
expect(cc.contraction?).to eq(true)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
context 'irregular contractions' do
|
211
|
+
it 'returns true if it is a contraction #001' do
|
212
|
+
token_a = 'is'
|
213
|
+
token_b = 'not'
|
214
|
+
contraction = "ain't"
|
215
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
216
|
+
expect(cc.contraction?).to eq(true)
|
217
|
+
end
|
218
|
+
|
219
|
+
it 'returns true if it is a contraction #002' do
|
220
|
+
token_a = 'madam'
|
221
|
+
token_b = nil
|
222
|
+
contraction = "ma'am"
|
223
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
224
|
+
expect(cc.contraction?).to eq(true)
|
225
|
+
end
|
226
|
+
|
227
|
+
it 'returns true if it is a contraction #003' do
|
228
|
+
token_a = 'never-do-well'
|
229
|
+
token_b = nil
|
230
|
+
contraction = "ne'er-do-well"
|
231
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
232
|
+
expect(cc.contraction?).to eq(true)
|
233
|
+
end
|
234
|
+
|
235
|
+
it 'returns true if it is a contraction #004' do
|
236
|
+
token_a = 'cat-of-nine-tails'
|
237
|
+
token_b = nil
|
238
|
+
contraction = "cat-o'-nine-tails"
|
239
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
240
|
+
expect(cc.contraction?).to eq(true)
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'returns true if it is a contraction #005' do
|
244
|
+
token_a = 'jack-of-the-lantern'
|
245
|
+
token_b = nil
|
246
|
+
contraction = "jack-o'-lantern"
|
247
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
248
|
+
expect(cc.contraction?).to eq(true)
|
249
|
+
end
|
250
|
+
|
251
|
+
it 'returns true if it is a contraction #006' do
|
252
|
+
token_a = 'will-of-the-wisp'
|
253
|
+
token_b = nil
|
254
|
+
contraction = "will-o'-the-wisp"
|
255
|
+
cc = ChatCorrect::Contraction.new(token_a: token_a, token_b: token_b, contraction: contraction)
|
256
|
+
expect(cc.contraction?).to eq(true)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|