chat_correct 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +208 -0
- data/Rakefile +4 -0
- data/chat_correct.gemspec +28 -0
- data/lib/chat_correct/capitalization.rb +13 -0
- data/lib/chat_correct/combine_multi_word_verbs.rb +51 -0
- data/lib/chat_correct/common_verb_mistake.rb +62 -0
- data/lib/chat_correct/contraction.rb +103 -0
- data/lib/chat_correct/correct.rb +352 -0
- data/lib/chat_correct/corrections_hash.rb +204 -0
- data/lib/chat_correct/mistake_analyzer.rb +40 -0
- data/lib/chat_correct/pluralization.rb +22 -0
- data/lib/chat_correct/possessive.rb +25 -0
- data/lib/chat_correct/punctuation.rb +17 -0
- data/lib/chat_correct/punctuation_masquerading_as_spelling_error.rb +14 -0
- data/lib/chat_correct/spelling.rb +20 -0
- data/lib/chat_correct/time.rb +14 -0
- data/lib/chat_correct/tokenize.rb +164 -0
- data/lib/chat_correct/verb.rb +65 -0
- data/lib/chat_correct/version.rb +3 -0
- data/lib/chat_correct.rb +16 -0
- data/spec/chat_correct/capitalization_spec.rb +17 -0
- data/spec/chat_correct/combine_multi_word_verbs_spec.rb +39 -0
- data/spec/chat_correct/common_verb_mistake_spec.rb +24 -0
- data/spec/chat_correct/contraction_spec.rb +259 -0
- data/spec/chat_correct/correct_spec.rb +1650 -0
- data/spec/chat_correct/mistake_analyzer_spec.rb +99 -0
- data/spec/chat_correct/pluralization_spec.rb +31 -0
- data/spec/chat_correct/possessive_spec.rb +31 -0
- data/spec/chat_correct/punctuation_masquerading_as_spelling_error_spec.rb +24 -0
- data/spec/chat_correct/punctuation_spec.rb +21 -0
- data/spec/chat_correct/spelling_spec.rb +59 -0
- data/spec/chat_correct/time_spec.rb +21 -0
- data/spec/chat_correct/tokenize_spec.rb +142 -0
- data/spec/chat_correct/verb_spec.rb +60 -0
- data/spec/spec_helper.rb +1 -0
- metadata +201 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::MistakeAnalyzer do
|
4
|
+
context '#no_mistake?' do
|
5
|
+
it 'returns true if there is no mistake' do
|
6
|
+
original = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"need", "next_word1"=>"at", "next_word2"=>"this", "num_char"=>8, "position"=>3, "multiple_words"=>false, "lowercase"=>"shopping", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c4"}
|
7
|
+
corrected = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"to", "next_word1"=>"this", "next_word2"=>"weekend", "num_char"=>8, "position"=>4, "multiple_words"=>false, "lowercase"=>"shopping", "match_id"=>"c4", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected4", "matched"=>true, "is_time"=>false}
|
8
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
9
|
+
expect(cc.no_mistake?).to eq(true)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'returns false if there is a mistake' do
|
13
|
+
original = {"token"=>"hello", "prev_word1"=>"go", "prev_word2"=>"need", "next_word1"=>"at", "next_word2"=>"this", "num_char"=>8, "position"=>3, "multiple_words"=>false, "lowercase"=>"shopping", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c4"}
|
14
|
+
corrected = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"to", "next_word1"=>"this", "next_word2"=>"weekend", "num_char"=>8, "position"=>4, "multiple_words"=>false, "lowercase"=>"shopping", "match_id"=>"c4", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected4", "matched"=>true, "is_time"=>false}
|
15
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
16
|
+
expect(cc.no_mistake?).to eq(false)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context '#verb_mistake?' do
|
21
|
+
it 'returns true if there is a verb mistake' do
|
22
|
+
original = {"token"=>"flied", "prev_word1"=>"I", "prev_word2"=>"ȸ", "next_word1"=>"home", "next_word2"=>"yesterday", "num_char"=>5, "position"=>1, "multiple_words"=>false, "lowercase"=>"flied", "pos_tag"=>"vbd", "punctuation"=>false, "duplicates"=>false, "uid"=>"original1", "matched"=>false, "is_time"=>false, "match_id"=>"c1"}
|
23
|
+
corrected = {"token"=>"flew", "prev_word1"=>"I", "prev_word2"=>"ȸ", "next_word1"=>"home", "next_word2"=>"yesterday", "num_char"=>4, "position"=>1, "multiple_words"=>false, "lowercase"=>"flew", "match_id"=>"c1", "pos_tag"=>"vbd", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected1", "matched"=>true, "is_time"=>false}
|
24
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
25
|
+
expect(cc.verb_mistake?).to eq(true)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'returns false if there is not a verb mistake' do
|
29
|
+
original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
|
30
|
+
corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
|
31
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
32
|
+
expect(cc.verb_mistake?).to eq(false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context '#capitalization_mistake?' do
|
37
|
+
it 'returns true if there is a capitalization mistake' do
|
38
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
39
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
40
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
41
|
+
expect(cc.capitalization_mistake?).to eq(true)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'returns false if there is not a captialization mistake' do
|
45
|
+
original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
|
46
|
+
corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
|
47
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
48
|
+
expect(cc.capitalization_mistake?).to eq(false)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context '#punctuation_mistake?' do
|
53
|
+
it 'returns true if there is a punctuation mistake' do
|
54
|
+
original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
|
55
|
+
corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
|
56
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
57
|
+
expect(cc.punctuation_mistake?).to eq(true)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns false if there is not a punctuation mistake' do
|
61
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
62
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
63
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
64
|
+
expect(cc.punctuation_mistake?).to eq(false)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context '#unnecessary_word_missing_punctuation_mistake?' do
|
69
|
+
it 'returns true if there is an unnecessary word / missing punctuation mistake' do
|
70
|
+
original = {"token"=>"when", "prev_word1"=>"Actually", "prev_word2"=>"ȸ", "next_word1"=>"I", "next_word2"=>"attended", "num_char"=>4, "position"=>1, "multiple_words"=>false, "lowercase"=>"when", "pos_tag"=>"wrb", "punctuation"=>false, "duplicates"=>false, "uid"=>"original1", "matched"=>false, "is_time"=>false, "match_id"=>"c1"}
|
71
|
+
corrected = {"token"=>",", "prev_word1"=>"Actually", "prev_word2"=>"ȸ", "next_word1"=>"I", "next_word2"=>"attended", "num_char"=>1, "position"=>1, "multiple_words"=>false, "lowercase"=>",", "match_id"=>"c1", "pos_tag"=>"ppc", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected1", "matched"=>true, "is_time"=>false}
|
72
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
73
|
+
expect(cc.unnecessary_word_missing_punctuation_mistake?).to eq(true)
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'returns false if there is not an unnecessary word / missing punctuation mistake' do
|
77
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
78
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
79
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
80
|
+
expect(cc.unnecessary_word_missing_punctuation_mistake?).to eq(false)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context '#spelling_mistake?' do
|
85
|
+
it 'returns true if there is a spelling mistake' do
|
86
|
+
original = {"token"=>"puncttuation", "prev_word1"=>",", "prev_word2"=>"the", "next_word1"=>"are", "next_word2"=>"wrong", "num_char"=>12, "position"=>3, "multiple_words"=>false, "lowercase"=>"puncttuation", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c2"}
|
87
|
+
corrected = {"token"=>"punctuation", "prev_word1"=>"the", "prev_word2"=>"Is", "next_word1"=>"wrong", "next_word2"=>"?", "num_char"=>11, "position"=>2, "multiple_words"=>false, "lowercase"=>"punctuation", "match_id"=>"c2", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected2", "matched"=>true, "is_time"=>false}
|
88
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
89
|
+
expect(cc.spelling_mistake?).to eq(true)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'returns false if there is not a spelling mistake' do
|
93
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
94
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
95
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
96
|
+
expect(cc.spelling_mistake?).to eq(false)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Pluralization do
|
4
|
+
it 'returns true if a pluralization error is found #001' do
|
5
|
+
token_a = 'chicken'
|
6
|
+
token_b = 'chickens'
|
7
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.pluralization_error?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns true if a pluralization error is found #002' do
|
12
|
+
token_a = 'chickens'
|
13
|
+
token_b = 'chicken'
|
14
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.pluralization_error?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns true if a pluralization error is found #003' do
|
19
|
+
token_a = 'goose'
|
20
|
+
token_b = 'geese'
|
21
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.pluralization_error?).to eq(true)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns false if a pluralization error is not found #004' do
|
26
|
+
token_a = 'hears'
|
27
|
+
token_b = 'heard'
|
28
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
29
|
+
expect(cc.pluralization_error?).to eq(false)
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Possessive do
|
4
|
+
it 'is a possessive #001' do
|
5
|
+
token_a = 'Johnƪs'
|
6
|
+
token_b = 'John'
|
7
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.possessive?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'is a possessive #002' do
|
12
|
+
token_a = 'John∮s'
|
13
|
+
token_b = 'John'
|
14
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.possessive?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'is a possessive #003' do
|
19
|
+
token_a = 'John'
|
20
|
+
token_b = 'Johnƪs'
|
21
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.possessive?).to eq(true)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'is a possessive #004' do
|
26
|
+
token_a = 'John'
|
27
|
+
token_b = 'John∮s'
|
28
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
29
|
+
expect(cc.possessive?).to eq(true)
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::PunctuationMasqueradingAsSpellingError do
|
4
|
+
it 'identifies words where the punctuation may masquerade as a spelling error in the algorithm #001' do
|
5
|
+
token_a = 'canƪt'
|
6
|
+
token_b = 'cant'
|
7
|
+
cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.exists?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'identifies words where the punctuation may masquerade as a spelling error in the algorithm #002' do
|
12
|
+
token_a = 'cant'
|
13
|
+
token_b = 'canƪt'
|
14
|
+
cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.exists?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns false for regualr spelling mistakes' do
|
19
|
+
token_a = 'speeling'
|
20
|
+
token_b = 'spelling'
|
21
|
+
cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.exists?).to eq(false)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Punctuation do
|
4
|
+
it 'returns true if the text is a punctuation mark #001' do
|
5
|
+
text = '?'
|
6
|
+
cc = ChatCorrect::Punctuation.new(text: text)
|
7
|
+
expect(cc.is_punctuation?).to eq(true)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'returns true if the text is a punctuation mark #002' do
|
11
|
+
text = '∯'
|
12
|
+
cc = ChatCorrect::Punctuation.new(text: text)
|
13
|
+
expect(cc.is_punctuation?).to eq(true)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns true if the text is a punctuation mark #003' do
|
17
|
+
text = 'hello'
|
18
|
+
cc = ChatCorrect::Punctuation.new(text: text)
|
19
|
+
expect(cc.is_punctuation?).to eq(false)
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Spelling do
|
4
|
+
it 'returns true if a spelling mistake is found #001' do
|
5
|
+
token_a = 'speeling'
|
6
|
+
token_b = 'spelling'
|
7
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.spelling_error?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns false if a spelling mistake is not found #002' do
|
12
|
+
token_a = 'cold'
|
13
|
+
token_b = 'warm'
|
14
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.spelling_error?).to eq(false)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns false if a spelling mistake is not found #003' do
|
19
|
+
token_a = 'the'
|
20
|
+
token_b = 'of'
|
21
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.spelling_error?).to eq(false)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns false if a spelling mistake is not found #004' do
|
26
|
+
token_a = '??'
|
27
|
+
token_b = '???'
|
28
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
29
|
+
expect(cc.spelling_error?).to eq(false)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns true if a spelling mistake is found #005' do
|
33
|
+
token_a = 'original'
|
34
|
+
token_b = 'originnal'
|
35
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
36
|
+
expect(cc.spelling_error?).to eq(true)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'returns false if a spelling mistake is not found #006' do
|
40
|
+
token_a = 'a'
|
41
|
+
token_b = 'b'
|
42
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
43
|
+
expect(cc.spelling_error?).to eq(false)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'returns false if a spelling mistake is not found #007' do
|
47
|
+
token_a = 'that'
|
48
|
+
token_b = 'this'
|
49
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
50
|
+
expect(cc.spelling_error?).to eq(false)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns false if a spelling mistake is not found #007' do
|
54
|
+
token_a = 'is'
|
55
|
+
token_b = 'Is'
|
56
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
57
|
+
expect(cc.spelling_error?).to eq(false)
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Time do
|
4
|
+
it 'returns true if the text is a time' do
|
5
|
+
text = "10:25 AM"
|
6
|
+
cc = ChatCorrect::Time.new(text: text)
|
7
|
+
expect(cc.is_time?).to eq(true)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'returns true if the text is a time' do
|
11
|
+
text = "23:11"
|
12
|
+
cc = ChatCorrect::Time.new(text: text)
|
13
|
+
expect(cc.is_time?).to eq(true)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns false if the text is not a time' do
|
17
|
+
text = "January 1st: It's the new year."
|
18
|
+
cc = ChatCorrect::Time.new(text: text)
|
19
|
+
expect(cc.is_time?).to eq(false)
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Tokenize do
|
4
|
+
it 'correctly tokenizes test sentence #001' do
|
5
|
+
text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
|
6
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
7
|
+
expect(cc.tokenize).to eq(["Hello", "Ms.", "Piggy", ",", "this", "is", "John", ".", "We", "are", "selling", "a", "new", "fridge", "for", "$5☌000", ".", "That", "is", "a", "20", "%", "discount", "over", "the", "Nev.", "retailers", ".", "It", "is", "a", "∫", "MUST", "BUY", "∮", ",", "so", "donƪt", "hesistate", "."])
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'correctly tokenizes test sentence #002' do
|
11
|
+
text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
|
12
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
13
|
+
expect(cc.tokenize_no_punct).to eq(["Hello", "Ms.", "Piggy", "this", "is", "John", "We", "are", "selling", "a", "new", "fridge", "for", "$5☌000", "That", "is", "a", "20", "discount", "over", "the", "Nev.", "retailers", "It", "is", "a", "∫", "MUST", "BUY", "∮", "so", "donƪt", "hesistate"])
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'correctly tokenizes test sentence #003' do
|
17
|
+
text = "Lisa Raines, a lawyer and director of government relations for the Industrial Biotechnical Association, contends that a judge well-versed in patent law and the concerns of research-based industries would have ruled otherwise. And Judge Newman, a former patent lawyer, wrote in her dissent when the court denied a motion for a rehearing of the case by the full court, \'The panel's judicial legislation has affected an important high-technological industry, without regard to the consequences for research and innovation or the public interest.\' Says Ms. Raines, \'[The judgement] confirms our concern that the absence of patent lawyers on the court could prove troublesome.\'"
|
18
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
19
|
+
expect(cc.tokenize).to eq(["Lisa", "Raines", ",", "a", "lawyer", "and", "director", "of", "government", "relations", "for", "the", "Industrial", "Biotechnical", "Association", ",", "contends", "that", "a", "judge", "well-versed", "in", "patent", "law", "and", "the", "concerns", "of", "research-based", "industries", "would", "have", "ruled", "otherwise", ".", "And", "Judge", "Newman", ",", "a", "former", "patent", "lawyer", ",", "wrote", "in", "her", "dissent", "when", "the", "court", "denied", "a", "motion", "for", "a", "rehearing", "of", "the", "case", "by", "the", "full", "court", ",", "∫", "The", "panelƪs", "judicial", "legislation", "has", "affected", "an", "important", "high-technological", "industry", ",", "without", "regard", "to", "the", "consequences", "for", "research", "and", "innovation", "or", "the", "public", "interest", ".", "∫", "Says", "Ms.", "Raines", ",", "∫", "[", "The", "judgement", "]", "confirms", "our", "concern", "that", "the", "absence", "of", "patent", "lawyers", "on", "the", "court", "could", "prove", "troublesome", ".", "∮"])
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'correctly tokenizes test sentence #004' do
|
23
|
+
text = 'Whether there will be eligible to become king to you?'
|
24
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
25
|
+
expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "?"])
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'correctly tokenizes test sentence #005' do
|
29
|
+
cc = ChatCorrect::Tokenize.new(text: 'Whether there will be eligible to become king to you!')
|
30
|
+
expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "!"])
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'correctly tokenizes test sentence #006' do
|
34
|
+
cc = ChatCorrect::Tokenize.new(text: 'Whether there will be eligible to become king to you.')
|
35
|
+
expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "."])
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'correctly tokenizes test sentence #007' do
|
39
|
+
cc = ChatCorrect::Tokenize.new(text: "\"Whether there will be eligible to become king to you.\"")
|
40
|
+
expect(cc.tokenize).to eq(["∬", "Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", ".", "∯"])
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'correctly tokenizes test sentence #008' do
|
44
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Mr. Smith.')
|
45
|
+
expect(cc.tokenize_no_punct).to eq(['His', 'name', 'is', 'Mr.', 'Smith'])
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'correctly tokenizes test sentence #009' do
|
49
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Mr. Smith.')
|
50
|
+
expect(cc.tokenize).to eq(['His', 'name', 'is', 'Mr.', 'Smith', '.'])
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'correctly tokenizes test sentence #010' do
|
54
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Col. Smith.')
|
55
|
+
expect(cc.tokenize).to eq(['His', 'name', 'is', 'Col.', 'Smith', '.'])
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'correctly tokenizes test sentence #011' do
|
59
|
+
cc = ChatCorrect::Tokenize.new(text: 'She went to East Univ. to get her degree.')
|
60
|
+
expect(cc.tokenize).to eq(['She', 'went', 'to', 'East', 'Univ.', 'to', 'get', 'her', 'degree', '.'])
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'correctly tokenizes test sentence #012' do
|
64
|
+
cc = ChatCorrect::Tokenize.new(text: 'He works at ABC Inc. on weekends.')
|
65
|
+
expect(cc.tokenize).to eq(['He', 'works', 'at', 'ABC', 'Inc.', 'on', 'weekends', '.'])
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'correctly tokenizes test sentence #013' do
|
69
|
+
cc = ChatCorrect::Tokenize.new(text: 'He went to school in Mass. back in the day.')
|
70
|
+
expect(cc.tokenize).to eq(['He', 'went', 'to', 'school', 'in', 'Mass.', 'back', 'in', 'the', 'day', '.'])
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'correctly tokenizes test sentence #014' do
|
74
|
+
cc = ChatCorrect::Tokenize.new(text: 'It is cold in Jan. they say.')
|
75
|
+
expect(cc.tokenize).to eq(['It', 'is', 'cold', 'in', 'Jan.', 'they', 'say', '.'])
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'correctly tokenizes test sentence #015' do
|
79
|
+
cc = ChatCorrect::Tokenize.new(text: '1, 2, 3, etc. is the beat.')
|
80
|
+
expect(cc.tokenize).to eq(['1', ',', '2', ',', '3', ',', 'etc.', 'is', 'the', 'beat', '.'])
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'correctly tokenizes test sentence #016' do
|
84
|
+
cc = ChatCorrect::Tokenize.new(text: 'Alfred E. Stone is a person.')
|
85
|
+
expect(cc.tokenize).to eq(['Alfred', 'E.', 'Stone', 'is', 'a', 'person', '.'])
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'correctly tokenizes test sentence #017' do
|
89
|
+
cc = ChatCorrect::Tokenize.new(text: 'The U.S.A. is a country.')
|
90
|
+
expect(cc.tokenize).to eq(['The', 'U.S.A.', 'is', 'a', 'country', '.'])
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'correctly tokenizes test sentence #018' do
|
94
|
+
cc = ChatCorrect::Tokenize.new(text: 'He works at ABC Inc.')
|
95
|
+
expect(cc.tokenize).to eq(['He', 'works', 'at', 'ABC', 'Inc', '.'])
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'correctly tokenizes test sentence #019' do
|
99
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Kevin')
|
100
|
+
expect(cc.tokenize).to eq(%w(His name is Kevin))
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'correctly tokenizes test sentence #020' do
|
104
|
+
cc = ChatCorrect::Tokenize.new(text: 'He paid $10,000,000 for the new house which is equivalent to ¥1,000,000,000.')
|
105
|
+
expect(cc.tokenize).to eq(["He", "paid", "$10☌000☌000", "for", "the", "new", "house", "which", "is", "equivalent", "to", "¥1☌000☌000☌000", "."])
|
106
|
+
end
|
107
|
+
|
108
|
+
it 'correctly tokenizes test sentence #021' do
|
109
|
+
cc = ChatCorrect::Tokenize.new(text: 'Exclamation point requires both marks (Q.E.D.!).')
|
110
|
+
expect(cc.tokenize).to eq(['Exclamation', 'point', 'requires', 'both', 'marks', '(', 'Q.E.D.', '!', ')', '.'])
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'correctly tokenizes test sentence #022' do
|
114
|
+
cc = ChatCorrect::Tokenize.new(text: 'An abbreviation that ends with a period must not be left hanging without it (in parentheses, e.g.), and a sentence containing a parenthesis must itself have terminal punctuation (are we almost done?).')
|
115
|
+
expect(cc.tokenize).to eq(['An', 'abbreviation', 'that', 'ends', 'with', 'a', 'period', 'must', 'not', 'be', 'left', 'hanging', 'without', 'it', '(', 'in', 'parentheses', ',', 'e.g.', ')', ',', 'and', 'a', 'sentence', 'containing', 'a', 'parenthesis', 'must', 'itself', 'have', 'terminal', 'punctuation', '(', 'are', 'we', 'almost', 'done', '?', ')', '.'])
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'correctly tokenizes test sentence #023' do
|
119
|
+
cc = ChatCorrect::Tokenize.new(text: 'his name is mr. smith, king of the \'entire\' forest.')
|
120
|
+
expect(cc.tokenize).to eq(["his", "name", "is", "mr.", "smith", ",", "king", "of", "the", "∫", "entire", "∮", "forest", "."])
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'correctly tokenizes test sentence #024' do
|
124
|
+
cc = ChatCorrect::Tokenize.new(text: 'Check out http://www.google.com/?this_is_a_url/hello-world.html for more info.')
|
125
|
+
expect(cc.tokenize).to eq(['Check', 'out', 'http://www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'correctly tokenizes test sentence #025' do
|
129
|
+
cc = ChatCorrect::Tokenize.new(text: 'Check out https://www.google.com/?this_is_a_url/hello-world.html for more info.')
|
130
|
+
expect(cc.tokenize).to eq(['Check', 'out', 'https://www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
|
131
|
+
end
|
132
|
+
|
133
|
+
it 'correctly tokenizes test sentence #026' do
|
134
|
+
cc = ChatCorrect::Tokenize.new(text: 'Check out www.google.com/?this_is_a_url/hello-world.html for more info.')
|
135
|
+
expect(cc.tokenize).to eq(['Check', 'out', 'www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
|
136
|
+
end
|
137
|
+
|
138
|
+
it 'correctly tokenizes test sentence #027' do
|
139
|
+
cc = ChatCorrect::Tokenize.new(text: 'Please email example@example.com for more info.')
|
140
|
+
expect(cc.tokenize).to eq(['Please', 'email', 'example@example.com', 'for', 'more', 'info', '.'])
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Verb do
|
4
|
+
|
5
|
+
it 'returns true if a verb error is found #001' do
|
6
|
+
word = 'was'
|
7
|
+
text = 'He is awesome.'
|
8
|
+
pos = 'v'
|
9
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
10
|
+
expect(cc.verb_error?).to eq(true)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'returns true if a verb error is found #002' do
|
14
|
+
word = 'buy'
|
15
|
+
text = 'He bought some shoes.'
|
16
|
+
pos = 'v'
|
17
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
18
|
+
expect(cc.verb_error?).to eq(true)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns false if it is not a verb #003' do
|
22
|
+
word = 'buy'
|
23
|
+
text = 'He bought some shoes.'
|
24
|
+
pos = 'n'
|
25
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
26
|
+
expect(cc.verb_error?).to eq(false)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns false if it is not a verb error #004' do
|
30
|
+
word = 'threw'
|
31
|
+
text = 'He bought some shoes.'
|
32
|
+
pos = 'v'
|
33
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
34
|
+
expect(cc.verb_error?).to eq(false)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns true if a verb error is found #005' do
|
38
|
+
word = 'eat'
|
39
|
+
text = 'I ate dinner.'
|
40
|
+
pos = 'v'
|
41
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
42
|
+
expect(cc.verb_error?).to eq(true)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'returns true if a verb error is found #005' do
|
46
|
+
word = 'win'
|
47
|
+
text = 'I won a medal.'
|
48
|
+
pos = 'v'
|
49
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
50
|
+
expect(cc.verb_error?).to eq(true)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns true if a verb error is found #005' do
|
54
|
+
word = 'file'
|
55
|
+
text = 'I filed the papers.'
|
56
|
+
pos = 'v'
|
57
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
58
|
+
expect(cc.verb_error?).to eq(true)
|
59
|
+
end
|
60
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'chat_correct'
|