chat_correct 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +208 -0
- data/Rakefile +4 -0
- data/chat_correct.gemspec +28 -0
- data/lib/chat_correct/capitalization.rb +13 -0
- data/lib/chat_correct/combine_multi_word_verbs.rb +51 -0
- data/lib/chat_correct/common_verb_mistake.rb +62 -0
- data/lib/chat_correct/contraction.rb +103 -0
- data/lib/chat_correct/correct.rb +352 -0
- data/lib/chat_correct/corrections_hash.rb +204 -0
- data/lib/chat_correct/mistake_analyzer.rb +40 -0
- data/lib/chat_correct/pluralization.rb +22 -0
- data/lib/chat_correct/possessive.rb +25 -0
- data/lib/chat_correct/punctuation.rb +17 -0
- data/lib/chat_correct/punctuation_masquerading_as_spelling_error.rb +14 -0
- data/lib/chat_correct/spelling.rb +20 -0
- data/lib/chat_correct/time.rb +14 -0
- data/lib/chat_correct/tokenize.rb +164 -0
- data/lib/chat_correct/verb.rb +65 -0
- data/lib/chat_correct/version.rb +3 -0
- data/lib/chat_correct.rb +16 -0
- data/spec/chat_correct/capitalization_spec.rb +17 -0
- data/spec/chat_correct/combine_multi_word_verbs_spec.rb +39 -0
- data/spec/chat_correct/common_verb_mistake_spec.rb +24 -0
- data/spec/chat_correct/contraction_spec.rb +259 -0
- data/spec/chat_correct/correct_spec.rb +1650 -0
- data/spec/chat_correct/mistake_analyzer_spec.rb +99 -0
- data/spec/chat_correct/pluralization_spec.rb +31 -0
- data/spec/chat_correct/possessive_spec.rb +31 -0
- data/spec/chat_correct/punctuation_masquerading_as_spelling_error_spec.rb +24 -0
- data/spec/chat_correct/punctuation_spec.rb +21 -0
- data/spec/chat_correct/spelling_spec.rb +59 -0
- data/spec/chat_correct/time_spec.rb +21 -0
- data/spec/chat_correct/tokenize_spec.rb +142 -0
- data/spec/chat_correct/verb_spec.rb +60 -0
- data/spec/spec_helper.rb +1 -0
- metadata +201 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::MistakeAnalyzer do
|
4
|
+
context '#no_mistake?' do
|
5
|
+
it 'returns true if there is no mistake' do
|
6
|
+
original = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"need", "next_word1"=>"at", "next_word2"=>"this", "num_char"=>8, "position"=>3, "multiple_words"=>false, "lowercase"=>"shopping", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c4"}
|
7
|
+
corrected = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"to", "next_word1"=>"this", "next_word2"=>"weekend", "num_char"=>8, "position"=>4, "multiple_words"=>false, "lowercase"=>"shopping", "match_id"=>"c4", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected4", "matched"=>true, "is_time"=>false}
|
8
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
9
|
+
expect(cc.no_mistake?).to eq(true)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'returns false if there is a mistake' do
|
13
|
+
original = {"token"=>"hello", "prev_word1"=>"go", "prev_word2"=>"need", "next_word1"=>"at", "next_word2"=>"this", "num_char"=>8, "position"=>3, "multiple_words"=>false, "lowercase"=>"shopping", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c4"}
|
14
|
+
corrected = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"to", "next_word1"=>"this", "next_word2"=>"weekend", "num_char"=>8, "position"=>4, "multiple_words"=>false, "lowercase"=>"shopping", "match_id"=>"c4", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected4", "matched"=>true, "is_time"=>false}
|
15
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
16
|
+
expect(cc.no_mistake?).to eq(false)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context '#verb_mistake?' do
|
21
|
+
it 'returns true if there is a verb mistake' do
|
22
|
+
original = {"token"=>"flied", "prev_word1"=>"I", "prev_word2"=>"ȸ", "next_word1"=>"home", "next_word2"=>"yesterday", "num_char"=>5, "position"=>1, "multiple_words"=>false, "lowercase"=>"flied", "pos_tag"=>"vbd", "punctuation"=>false, "duplicates"=>false, "uid"=>"original1", "matched"=>false, "is_time"=>false, "match_id"=>"c1"}
|
23
|
+
corrected = {"token"=>"flew", "prev_word1"=>"I", "prev_word2"=>"ȸ", "next_word1"=>"home", "next_word2"=>"yesterday", "num_char"=>4, "position"=>1, "multiple_words"=>false, "lowercase"=>"flew", "match_id"=>"c1", "pos_tag"=>"vbd", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected1", "matched"=>true, "is_time"=>false}
|
24
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
25
|
+
expect(cc.verb_mistake?).to eq(true)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'returns false if there is not a verb mistake' do
|
29
|
+
original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
|
30
|
+
corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
|
31
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
32
|
+
expect(cc.verb_mistake?).to eq(false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context '#capitalization_mistake?' do
|
37
|
+
it 'returns true if there is a capitalization mistake' do
|
38
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
39
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
40
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
41
|
+
expect(cc.capitalization_mistake?).to eq(true)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'returns false if there is not a captialization mistake' do
|
45
|
+
original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
|
46
|
+
corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
|
47
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
48
|
+
expect(cc.capitalization_mistake?).to eq(false)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context '#punctuation_mistake?' do
|
53
|
+
it 'returns true if there is a punctuation mistake' do
|
54
|
+
original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
|
55
|
+
corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
|
56
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
57
|
+
expect(cc.punctuation_mistake?).to eq(true)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns false if there is not a punctuation mistake' do
|
61
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
62
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
63
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
64
|
+
expect(cc.punctuation_mistake?).to eq(false)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context '#unnecessary_word_missing_punctuation_mistake?' do
|
69
|
+
it 'returns true if there is an unnecessary word / missing punctuation mistake' do
|
70
|
+
original = {"token"=>"when", "prev_word1"=>"Actually", "prev_word2"=>"ȸ", "next_word1"=>"I", "next_word2"=>"attended", "num_char"=>4, "position"=>1, "multiple_words"=>false, "lowercase"=>"when", "pos_tag"=>"wrb", "punctuation"=>false, "duplicates"=>false, "uid"=>"original1", "matched"=>false, "is_time"=>false, "match_id"=>"c1"}
|
71
|
+
corrected = {"token"=>",", "prev_word1"=>"Actually", "prev_word2"=>"ȸ", "next_word1"=>"I", "next_word2"=>"attended", "num_char"=>1, "position"=>1, "multiple_words"=>false, "lowercase"=>",", "match_id"=>"c1", "pos_tag"=>"ppc", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected1", "matched"=>true, "is_time"=>false}
|
72
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
73
|
+
expect(cc.unnecessary_word_missing_punctuation_mistake?).to eq(true)
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'returns false if there is not an unnecessary word / missing punctuation mistake' do
|
77
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
78
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
79
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
80
|
+
expect(cc.unnecessary_word_missing_punctuation_mistake?).to eq(false)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context '#spelling_mistake?' do
|
85
|
+
it 'returns true if there is a spelling mistake' do
|
86
|
+
original = {"token"=>"puncttuation", "prev_word1"=>",", "prev_word2"=>"the", "next_word1"=>"are", "next_word2"=>"wrong", "num_char"=>12, "position"=>3, "multiple_words"=>false, "lowercase"=>"puncttuation", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c2"}
|
87
|
+
corrected = {"token"=>"punctuation", "prev_word1"=>"the", "prev_word2"=>"Is", "next_word1"=>"wrong", "next_word2"=>"?", "num_char"=>11, "position"=>2, "multiple_words"=>false, "lowercase"=>"punctuation", "match_id"=>"c2", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected2", "matched"=>true, "is_time"=>false}
|
88
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
89
|
+
expect(cc.spelling_mistake?).to eq(true)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'returns false if there is not a spelling mistake' do
|
93
|
+
original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
|
94
|
+
corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
|
95
|
+
cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
|
96
|
+
expect(cc.spelling_mistake?).to eq(false)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Pluralization do
|
4
|
+
it 'returns true if a pluralization error is found #001' do
|
5
|
+
token_a = 'chicken'
|
6
|
+
token_b = 'chickens'
|
7
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.pluralization_error?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns true if a pluralization error is found #002' do
|
12
|
+
token_a = 'chickens'
|
13
|
+
token_b = 'chicken'
|
14
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.pluralization_error?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns true if a pluralization error is found #003' do
|
19
|
+
token_a = 'goose'
|
20
|
+
token_b = 'geese'
|
21
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.pluralization_error?).to eq(true)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns false if a pluralization error is not found #004' do
|
26
|
+
token_a = 'hears'
|
27
|
+
token_b = 'heard'
|
28
|
+
cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
|
29
|
+
expect(cc.pluralization_error?).to eq(false)
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Possessive do
|
4
|
+
it 'is a possessive #001' do
|
5
|
+
token_a = 'Johnƪs'
|
6
|
+
token_b = 'John'
|
7
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.possessive?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'is a possessive #002' do
|
12
|
+
token_a = 'John∮s'
|
13
|
+
token_b = 'John'
|
14
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.possessive?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'is a possessive #003' do
|
19
|
+
token_a = 'John'
|
20
|
+
token_b = 'Johnƪs'
|
21
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.possessive?).to eq(true)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'is a possessive #004' do
|
26
|
+
token_a = 'John'
|
27
|
+
token_b = 'John∮s'
|
28
|
+
cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
|
29
|
+
expect(cc.possessive?).to eq(true)
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::PunctuationMasqueradingAsSpellingError do
|
4
|
+
it 'identifies words where the punctuation may masquerade as a spelling error in the algorithm #001' do
|
5
|
+
token_a = 'canƪt'
|
6
|
+
token_b = 'cant'
|
7
|
+
cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.exists?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'identifies words where the punctuation may masquerade as a spelling error in the algorithm #002' do
|
12
|
+
token_a = 'cant'
|
13
|
+
token_b = 'canƪt'
|
14
|
+
cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.exists?).to eq(true)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns false for regualr spelling mistakes' do
|
19
|
+
token_a = 'speeling'
|
20
|
+
token_b = 'spelling'
|
21
|
+
cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.exists?).to eq(false)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Punctuation do
|
4
|
+
it 'returns true if the text is a punctuation mark #001' do
|
5
|
+
text = '?'
|
6
|
+
cc = ChatCorrect::Punctuation.new(text: text)
|
7
|
+
expect(cc.is_punctuation?).to eq(true)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'returns true if the text is a punctuation mark #002' do
|
11
|
+
text = '∯'
|
12
|
+
cc = ChatCorrect::Punctuation.new(text: text)
|
13
|
+
expect(cc.is_punctuation?).to eq(true)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns true if the text is a punctuation mark #003' do
|
17
|
+
text = 'hello'
|
18
|
+
cc = ChatCorrect::Punctuation.new(text: text)
|
19
|
+
expect(cc.is_punctuation?).to eq(false)
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Spelling do
|
4
|
+
it 'returns true if a spelling mistake is found #001' do
|
5
|
+
token_a = 'speeling'
|
6
|
+
token_b = 'spelling'
|
7
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
8
|
+
expect(cc.spelling_error?).to eq(true)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'returns false if a spelling mistake is not found #002' do
|
12
|
+
token_a = 'cold'
|
13
|
+
token_b = 'warm'
|
14
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
15
|
+
expect(cc.spelling_error?).to eq(false)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns false if a spelling mistake is not found #003' do
|
19
|
+
token_a = 'the'
|
20
|
+
token_b = 'of'
|
21
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
22
|
+
expect(cc.spelling_error?).to eq(false)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns false if a spelling mistake is not found #004' do
|
26
|
+
token_a = '??'
|
27
|
+
token_b = '???'
|
28
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
29
|
+
expect(cc.spelling_error?).to eq(false)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns true if a spelling mistake is found #005' do
|
33
|
+
token_a = 'original'
|
34
|
+
token_b = 'originnal'
|
35
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
36
|
+
expect(cc.spelling_error?).to eq(true)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'returns false if a spelling mistake is not found #006' do
|
40
|
+
token_a = 'a'
|
41
|
+
token_b = 'b'
|
42
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
43
|
+
expect(cc.spelling_error?).to eq(false)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'returns false if a spelling mistake is not found #007' do
|
47
|
+
token_a = 'that'
|
48
|
+
token_b = 'this'
|
49
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
50
|
+
expect(cc.spelling_error?).to eq(false)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns false if a spelling mistake is not found #007' do
|
54
|
+
token_a = 'is'
|
55
|
+
token_b = 'Is'
|
56
|
+
cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
|
57
|
+
expect(cc.spelling_error?).to eq(false)
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Time do
|
4
|
+
it 'returns true if the text is a time' do
|
5
|
+
text = "10:25 AM"
|
6
|
+
cc = ChatCorrect::Time.new(text: text)
|
7
|
+
expect(cc.is_time?).to eq(true)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'returns true if the text is a time' do
|
11
|
+
text = "23:11"
|
12
|
+
cc = ChatCorrect::Time.new(text: text)
|
13
|
+
expect(cc.is_time?).to eq(true)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns false if the text is not a time' do
|
17
|
+
text = "January 1st: It's the new year."
|
18
|
+
cc = ChatCorrect::Time.new(text: text)
|
19
|
+
expect(cc.is_time?).to eq(false)
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Tokenize do
|
4
|
+
it 'correctly tokenizes test sentence #001' do
|
5
|
+
text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
|
6
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
7
|
+
expect(cc.tokenize).to eq(["Hello", "Ms.", "Piggy", ",", "this", "is", "John", ".", "We", "are", "selling", "a", "new", "fridge", "for", "$5☌000", ".", "That", "is", "a", "20", "%", "discount", "over", "the", "Nev.", "retailers", ".", "It", "is", "a", "∫", "MUST", "BUY", "∮", ",", "so", "donƪt", "hesistate", "."])
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'correctly tokenizes test sentence #002' do
|
11
|
+
text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
|
12
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
13
|
+
expect(cc.tokenize_no_punct).to eq(["Hello", "Ms.", "Piggy", "this", "is", "John", "We", "are", "selling", "a", "new", "fridge", "for", "$5☌000", "That", "is", "a", "20", "discount", "over", "the", "Nev.", "retailers", "It", "is", "a", "∫", "MUST", "BUY", "∮", "so", "donƪt", "hesistate"])
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'correctly tokenizes test sentence #003' do
|
17
|
+
text = "Lisa Raines, a lawyer and director of government relations for the Industrial Biotechnical Association, contends that a judge well-versed in patent law and the concerns of research-based industries would have ruled otherwise. And Judge Newman, a former patent lawyer, wrote in her dissent when the court denied a motion for a rehearing of the case by the full court, \'The panel's judicial legislation has affected an important high-technological industry, without regard to the consequences for research and innovation or the public interest.\' Says Ms. Raines, \'[The judgement] confirms our concern that the absence of patent lawyers on the court could prove troublesome.\'"
|
18
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
19
|
+
expect(cc.tokenize).to eq(["Lisa", "Raines", ",", "a", "lawyer", "and", "director", "of", "government", "relations", "for", "the", "Industrial", "Biotechnical", "Association", ",", "contends", "that", "a", "judge", "well-versed", "in", "patent", "law", "and", "the", "concerns", "of", "research-based", "industries", "would", "have", "ruled", "otherwise", ".", "And", "Judge", "Newman", ",", "a", "former", "patent", "lawyer", ",", "wrote", "in", "her", "dissent", "when", "the", "court", "denied", "a", "motion", "for", "a", "rehearing", "of", "the", "case", "by", "the", "full", "court", ",", "∫", "The", "panelƪs", "judicial", "legislation", "has", "affected", "an", "important", "high-technological", "industry", ",", "without", "regard", "to", "the", "consequences", "for", "research", "and", "innovation", "or", "the", "public", "interest", ".", "∫", "Says", "Ms.", "Raines", ",", "∫", "[", "The", "judgement", "]", "confirms", "our", "concern", "that", "the", "absence", "of", "patent", "lawyers", "on", "the", "court", "could", "prove", "troublesome", ".", "∮"])
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'correctly tokenizes test sentence #004' do
|
23
|
+
text = 'Whether there will be eligible to become king to you?'
|
24
|
+
cc = ChatCorrect::Tokenize.new(text: text)
|
25
|
+
expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "?"])
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'correctly tokenizes test sentence #005' do
|
29
|
+
cc = ChatCorrect::Tokenize.new(text: 'Whether there will be eligible to become king to you!')
|
30
|
+
expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "!"])
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'correctly tokenizes test sentence #006' do
|
34
|
+
cc = ChatCorrect::Tokenize.new(text: 'Whether there will be eligible to become king to you.')
|
35
|
+
expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "."])
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'correctly tokenizes test sentence #007' do
|
39
|
+
cc = ChatCorrect::Tokenize.new(text: "\"Whether there will be eligible to become king to you.\"")
|
40
|
+
expect(cc.tokenize).to eq(["∬", "Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", ".", "∯"])
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'correctly tokenizes test sentence #008' do
|
44
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Mr. Smith.')
|
45
|
+
expect(cc.tokenize_no_punct).to eq(['His', 'name', 'is', 'Mr.', 'Smith'])
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'correctly tokenizes test sentence #009' do
|
49
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Mr. Smith.')
|
50
|
+
expect(cc.tokenize).to eq(['His', 'name', 'is', 'Mr.', 'Smith', '.'])
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'correctly tokenizes test sentence #010' do
|
54
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Col. Smith.')
|
55
|
+
expect(cc.tokenize).to eq(['His', 'name', 'is', 'Col.', 'Smith', '.'])
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'correctly tokenizes test sentence #011' do
|
59
|
+
cc = ChatCorrect::Tokenize.new(text: 'She went to East Univ. to get her degree.')
|
60
|
+
expect(cc.tokenize).to eq(['She', 'went', 'to', 'East', 'Univ.', 'to', 'get', 'her', 'degree', '.'])
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'correctly tokenizes test sentence #012' do
|
64
|
+
cc = ChatCorrect::Tokenize.new(text: 'He works at ABC Inc. on weekends.')
|
65
|
+
expect(cc.tokenize).to eq(['He', 'works', 'at', 'ABC', 'Inc.', 'on', 'weekends', '.'])
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'correctly tokenizes test sentence #013' do
|
69
|
+
cc = ChatCorrect::Tokenize.new(text: 'He went to school in Mass. back in the day.')
|
70
|
+
expect(cc.tokenize).to eq(['He', 'went', 'to', 'school', 'in', 'Mass.', 'back', 'in', 'the', 'day', '.'])
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'correctly tokenizes test sentence #014' do
|
74
|
+
cc = ChatCorrect::Tokenize.new(text: 'It is cold in Jan. they say.')
|
75
|
+
expect(cc.tokenize).to eq(['It', 'is', 'cold', 'in', 'Jan.', 'they', 'say', '.'])
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'correctly tokenizes test sentence #015' do
|
79
|
+
cc = ChatCorrect::Tokenize.new(text: '1, 2, 3, etc. is the beat.')
|
80
|
+
expect(cc.tokenize).to eq(['1', ',', '2', ',', '3', ',', 'etc.', 'is', 'the', 'beat', '.'])
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'correctly tokenizes test sentence #016' do
|
84
|
+
cc = ChatCorrect::Tokenize.new(text: 'Alfred E. Stone is a person.')
|
85
|
+
expect(cc.tokenize).to eq(['Alfred', 'E.', 'Stone', 'is', 'a', 'person', '.'])
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'correctly tokenizes test sentence #017' do
|
89
|
+
cc = ChatCorrect::Tokenize.new(text: 'The U.S.A. is a country.')
|
90
|
+
expect(cc.tokenize).to eq(['The', 'U.S.A.', 'is', 'a', 'country', '.'])
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'correctly tokenizes test sentence #018' do
|
94
|
+
cc = ChatCorrect::Tokenize.new(text: 'He works at ABC Inc.')
|
95
|
+
expect(cc.tokenize).to eq(['He', 'works', 'at', 'ABC', 'Inc', '.'])
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'correctly tokenizes test sentence #019' do
|
99
|
+
cc = ChatCorrect::Tokenize.new(text: 'His name is Kevin')
|
100
|
+
expect(cc.tokenize).to eq(%w(His name is Kevin))
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'correctly tokenizes test sentence #020' do
|
104
|
+
cc = ChatCorrect::Tokenize.new(text: 'He paid $10,000,000 for the new house which is equivalent to ¥1,000,000,000.')
|
105
|
+
expect(cc.tokenize).to eq(["He", "paid", "$10☌000☌000", "for", "the", "new", "house", "which", "is", "equivalent", "to", "¥1☌000☌000☌000", "."])
|
106
|
+
end
|
107
|
+
|
108
|
+
it 'correctly tokenizes test sentence #021' do
|
109
|
+
cc = ChatCorrect::Tokenize.new(text: 'Exclamation point requires both marks (Q.E.D.!).')
|
110
|
+
expect(cc.tokenize).to eq(['Exclamation', 'point', 'requires', 'both', 'marks', '(', 'Q.E.D.', '!', ')', '.'])
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'correctly tokenizes test sentence #022' do
|
114
|
+
cc = ChatCorrect::Tokenize.new(text: 'An abbreviation that ends with a period must not be left hanging without it (in parentheses, e.g.), and a sentence containing a parenthesis must itself have terminal punctuation (are we almost done?).')
|
115
|
+
expect(cc.tokenize).to eq(['An', 'abbreviation', 'that', 'ends', 'with', 'a', 'period', 'must', 'not', 'be', 'left', 'hanging', 'without', 'it', '(', 'in', 'parentheses', ',', 'e.g.', ')', ',', 'and', 'a', 'sentence', 'containing', 'a', 'parenthesis', 'must', 'itself', 'have', 'terminal', 'punctuation', '(', 'are', 'we', 'almost', 'done', '?', ')', '.'])
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'correctly tokenizes test sentence #023' do
|
119
|
+
cc = ChatCorrect::Tokenize.new(text: 'his name is mr. smith, king of the \'entire\' forest.')
|
120
|
+
expect(cc.tokenize).to eq(["his", "name", "is", "mr.", "smith", ",", "king", "of", "the", "∫", "entire", "∮", "forest", "."])
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'correctly tokenizes test sentence #024' do
|
124
|
+
cc = ChatCorrect::Tokenize.new(text: 'Check out http://www.google.com/?this_is_a_url/hello-world.html for more info.')
|
125
|
+
expect(cc.tokenize).to eq(['Check', 'out', 'http://www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'correctly tokenizes test sentence #025' do
|
129
|
+
cc = ChatCorrect::Tokenize.new(text: 'Check out https://www.google.com/?this_is_a_url/hello-world.html for more info.')
|
130
|
+
expect(cc.tokenize).to eq(['Check', 'out', 'https://www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
|
131
|
+
end
|
132
|
+
|
133
|
+
it 'correctly tokenizes test sentence #026' do
|
134
|
+
cc = ChatCorrect::Tokenize.new(text: 'Check out www.google.com/?this_is_a_url/hello-world.html for more info.')
|
135
|
+
expect(cc.tokenize).to eq(['Check', 'out', 'www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
|
136
|
+
end
|
137
|
+
|
138
|
+
it 'correctly tokenizes test sentence #027' do
|
139
|
+
cc = ChatCorrect::Tokenize.new(text: 'Please email example@example.com for more info.')
|
140
|
+
expect(cc.tokenize).to eq(['Please', 'email', 'example@example.com', 'for', 'more', 'info', '.'])
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe ChatCorrect::Verb do
|
4
|
+
|
5
|
+
it 'returns true if a verb error is found #001' do
|
6
|
+
word = 'was'
|
7
|
+
text = 'He is awesome.'
|
8
|
+
pos = 'v'
|
9
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
10
|
+
expect(cc.verb_error?).to eq(true)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'returns true if a verb error is found #002' do
|
14
|
+
word = 'buy'
|
15
|
+
text = 'He bought some shoes.'
|
16
|
+
pos = 'v'
|
17
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
18
|
+
expect(cc.verb_error?).to eq(true)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns false if it is not a verb #003' do
|
22
|
+
word = 'buy'
|
23
|
+
text = 'He bought some shoes.'
|
24
|
+
pos = 'n'
|
25
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
26
|
+
expect(cc.verb_error?).to eq(false)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns false if it is not a verb error #004' do
|
30
|
+
word = 'threw'
|
31
|
+
text = 'He bought some shoes.'
|
32
|
+
pos = 'v'
|
33
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
34
|
+
expect(cc.verb_error?).to eq(false)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns true if a verb error is found #005' do
|
38
|
+
word = 'eat'
|
39
|
+
text = 'I ate dinner.'
|
40
|
+
pos = 'v'
|
41
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
42
|
+
expect(cc.verb_error?).to eq(true)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'returns true if a verb error is found #005' do
|
46
|
+
word = 'win'
|
47
|
+
text = 'I won a medal.'
|
48
|
+
pos = 'v'
|
49
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
50
|
+
expect(cc.verb_error?).to eq(true)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns true if a verb error is found #005' do
|
54
|
+
word = 'file'
|
55
|
+
text = 'I filed the papers.'
|
56
|
+
pos = 'v'
|
57
|
+
cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
|
58
|
+
expect(cc.verb_error?).to eq(true)
|
59
|
+
end
|
60
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'chat_correct'
|