chat_correct 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +208 -0
  8. data/Rakefile +4 -0
  9. data/chat_correct.gemspec +28 -0
  10. data/lib/chat_correct/capitalization.rb +13 -0
  11. data/lib/chat_correct/combine_multi_word_verbs.rb +51 -0
  12. data/lib/chat_correct/common_verb_mistake.rb +62 -0
  13. data/lib/chat_correct/contraction.rb +103 -0
  14. data/lib/chat_correct/correct.rb +352 -0
  15. data/lib/chat_correct/corrections_hash.rb +204 -0
  16. data/lib/chat_correct/mistake_analyzer.rb +40 -0
  17. data/lib/chat_correct/pluralization.rb +22 -0
  18. data/lib/chat_correct/possessive.rb +25 -0
  19. data/lib/chat_correct/punctuation.rb +17 -0
  20. data/lib/chat_correct/punctuation_masquerading_as_spelling_error.rb +14 -0
  21. data/lib/chat_correct/spelling.rb +20 -0
  22. data/lib/chat_correct/time.rb +14 -0
  23. data/lib/chat_correct/tokenize.rb +164 -0
  24. data/lib/chat_correct/verb.rb +65 -0
  25. data/lib/chat_correct/version.rb +3 -0
  26. data/lib/chat_correct.rb +16 -0
  27. data/spec/chat_correct/capitalization_spec.rb +17 -0
  28. data/spec/chat_correct/combine_multi_word_verbs_spec.rb +39 -0
  29. data/spec/chat_correct/common_verb_mistake_spec.rb +24 -0
  30. data/spec/chat_correct/contraction_spec.rb +259 -0
  31. data/spec/chat_correct/correct_spec.rb +1650 -0
  32. data/spec/chat_correct/mistake_analyzer_spec.rb +99 -0
  33. data/spec/chat_correct/pluralization_spec.rb +31 -0
  34. data/spec/chat_correct/possessive_spec.rb +31 -0
  35. data/spec/chat_correct/punctuation_masquerading_as_spelling_error_spec.rb +24 -0
  36. data/spec/chat_correct/punctuation_spec.rb +21 -0
  37. data/spec/chat_correct/spelling_spec.rb +59 -0
  38. data/spec/chat_correct/time_spec.rb +21 -0
  39. data/spec/chat_correct/tokenize_spec.rb +142 -0
  40. data/spec/chat_correct/verb_spec.rb +60 -0
  41. data/spec/spec_helper.rb +1 -0
  42. metadata +201 -0
@@ -0,0 +1,99 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::MistakeAnalyzer do
4
+ context '#no_mistake?' do
5
+ it 'returns true if there is no mistake' do
6
+ original = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"need", "next_word1"=>"at", "next_word2"=>"this", "num_char"=>8, "position"=>3, "multiple_words"=>false, "lowercase"=>"shopping", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c4"}
7
+ corrected = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"to", "next_word1"=>"this", "next_word2"=>"weekend", "num_char"=>8, "position"=>4, "multiple_words"=>false, "lowercase"=>"shopping", "match_id"=>"c4", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected4", "matched"=>true, "is_time"=>false}
8
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
9
+ expect(cc.no_mistake?).to eq(true)
10
+ end
11
+
12
+ it 'returns false if there is a mistake' do
13
+ original = {"token"=>"hello", "prev_word1"=>"go", "prev_word2"=>"need", "next_word1"=>"at", "next_word2"=>"this", "num_char"=>8, "position"=>3, "multiple_words"=>false, "lowercase"=>"shopping", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c4"}
14
+ corrected = {"token"=>"shopping", "prev_word1"=>"go", "prev_word2"=>"to", "next_word1"=>"this", "next_word2"=>"weekend", "num_char"=>8, "position"=>4, "multiple_words"=>false, "lowercase"=>"shopping", "match_id"=>"c4", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected4", "matched"=>true, "is_time"=>false}
15
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
16
+ expect(cc.no_mistake?).to eq(false)
17
+ end
18
+ end
19
+
20
+ context '#verb_mistake?' do
21
+ it 'returns true if there is a verb mistake' do
22
+ original = {"token"=>"flied", "prev_word1"=>"I", "prev_word2"=>"ȸ", "next_word1"=>"home", "next_word2"=>"yesterday", "num_char"=>5, "position"=>1, "multiple_words"=>false, "lowercase"=>"flied", "pos_tag"=>"vbd", "punctuation"=>false, "duplicates"=>false, "uid"=>"original1", "matched"=>false, "is_time"=>false, "match_id"=>"c1"}
23
+ corrected = {"token"=>"flew", "prev_word1"=>"I", "prev_word2"=>"ȸ", "next_word1"=>"home", "next_word2"=>"yesterday", "num_char"=>4, "position"=>1, "multiple_words"=>false, "lowercase"=>"flew", "match_id"=>"c1", "pos_tag"=>"vbd", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected1", "matched"=>true, "is_time"=>false}
24
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
25
+ expect(cc.verb_mistake?).to eq(true)
26
+ end
27
+
28
+ it 'returns false if there is not a verb mistake' do
29
+ original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
30
+ corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
31
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
32
+ expect(cc.verb_mistake?).to eq(false)
33
+ end
34
+ end
35
+
36
+ context '#capitalization_mistake?' do
37
+ it 'returns true if there is a capitalization mistake' do
38
+ original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
39
+ corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
40
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
41
+ expect(cc.capitalization_mistake?).to eq(true)
42
+ end
43
+
44
+ it 'returns false if there is not a captialization mistake' do
45
+ original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
46
+ corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
47
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
48
+ expect(cc.capitalization_mistake?).to eq(false)
49
+ end
50
+ end
51
+
52
+ context '#punctuation_mistake?' do
53
+ it 'returns true if there is a punctuation mistake' do
54
+ original = {"token"=>"!", "prev_word1"=>"misspeellings", "prev_word2"=>"consecutiveee", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>"!", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"original7", "matched"=>false, "is_time"=>false, "match_id"=>"c7"}
55
+ corrected = {"token"=>".", "prev_word1"=>"misspellings", "prev_word2"=>"consecutive", "next_word1"=>"ȹ", "next_word2"=>"ȹ", "num_char"=>1, "position"=>7, "multiple_words"=>false, "lowercase"=>".", "match_id"=>"c7", "pos_tag"=>"pp", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected7", "matched"=>true, "is_time"=>false}
56
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
57
+ expect(cc.punctuation_mistake?).to eq(true)
58
+ end
59
+
60
+ it 'returns false if there is not a punctuation mistake' do
61
+ original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
62
+ corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
63
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
64
+ expect(cc.punctuation_mistake?).to eq(false)
65
+ end
66
+ end
67
+
68
+ context '#unnecessary_word_missing_punctuation_mistake?' do
69
+ it 'returns true if there is an unnecessary word / missing punctuation mistake' do
70
+ original = {"token"=>"when", "prev_word1"=>"Actually", "prev_word2"=>"ȸ", "next_word1"=>"I", "next_word2"=>"attended", "num_char"=>4, "position"=>1, "multiple_words"=>false, "lowercase"=>"when", "pos_tag"=>"wrb", "punctuation"=>false, "duplicates"=>false, "uid"=>"original1", "matched"=>false, "is_time"=>false, "match_id"=>"c1"}
71
+ corrected = {"token"=>",", "prev_word1"=>"Actually", "prev_word2"=>"ȸ", "next_word1"=>"I", "next_word2"=>"attended", "num_char"=>1, "position"=>1, "multiple_words"=>false, "lowercase"=>",", "match_id"=>"c1", "pos_tag"=>"ppc", "punctuation"=>true, "duplicates"=>false, "uid"=>"corrected1", "matched"=>true, "is_time"=>false}
72
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
73
+ expect(cc.unnecessary_word_missing_punctuation_mistake?).to eq(true)
74
+ end
75
+
76
+ it 'returns false if there is not an unnecessary word / missing punctuation mistake' do
77
+ original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
78
+ corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
79
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
80
+ expect(cc.unnecessary_word_missing_punctuation_mistake?).to eq(false)
81
+ end
82
+ end
83
+
84
+ context '#spelling_mistake?' do
85
+ it 'returns true if there is a spelling mistake' do
86
+ original = {"token"=>"puncttuation", "prev_word1"=>",", "prev_word2"=>"the", "next_word1"=>"are", "next_word2"=>"wrong", "num_char"=>12, "position"=>3, "multiple_words"=>false, "lowercase"=>"puncttuation", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"original3", "matched"=>false, "is_time"=>false, "match_id"=>"c2"}
87
+ corrected = {"token"=>"punctuation", "prev_word1"=>"the", "prev_word2"=>"Is", "next_word1"=>"wrong", "next_word2"=>"?", "num_char"=>11, "position"=>2, "multiple_words"=>false, "lowercase"=>"punctuation", "match_id"=>"c2", "pos_tag"=>"nn", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected2", "matched"=>true, "is_time"=>false}
88
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
89
+ expect(cc.spelling_mistake?).to eq(true)
90
+ end
91
+
92
+ it 'returns false if there is not a spelling mistake' do
93
+ original = {"token"=>"is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>",", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "pos_tag"=>"vbz", "punctuation"=>false, "duplicates"=>false, "uid"=>"original0", "matched"=>false, "is_time"=>false, "match_id"=>"c0"}
94
+ corrected = {"token"=>"Is", "prev_word1"=>"ȸ", "prev_word2"=>"ȸ", "next_word1"=>"the", "next_word2"=>"punctuation", "num_char"=>2, "position"=>0, "multiple_words"=>false, "lowercase"=>"is", "match_id"=>"c0", "pos_tag"=>"nnp", "punctuation"=>false, "duplicates"=>false, "uid"=>"corrected0", "matched"=>true, "is_time"=>false}
95
+ cc = ChatCorrect::MistakeAnalyzer.new(original: original, corrected: corrected)
96
+ expect(cc.spelling_mistake?).to eq(false)
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::Pluralization do
4
+ it 'returns true if a pluralization error is found #001' do
5
+ token_a = 'chicken'
6
+ token_b = 'chickens'
7
+ cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
8
+ expect(cc.pluralization_error?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if a pluralization error is found #002' do
12
+ token_a = 'chickens'
13
+ token_b = 'chicken'
14
+ cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
15
+ expect(cc.pluralization_error?).to eq(true)
16
+ end
17
+
18
+ it 'returns true if a pluralization error is found #003' do
19
+ token_a = 'goose'
20
+ token_b = 'geese'
21
+ cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
22
+ expect(cc.pluralization_error?).to eq(true)
23
+ end
24
+
25
+ it 'returns false if a pluralization error is not found #004' do
26
+ token_a = 'hears'
27
+ token_b = 'heard'
28
+ cc = ChatCorrect::Pluralization.new(token_a: token_a, token_b: token_b)
29
+ expect(cc.pluralization_error?).to eq(false)
30
+ end
31
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::Possessive do
4
+ it 'is a possessive #001' do
5
+ token_a = 'Johnƪs'
6
+ token_b = 'John'
7
+ cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
8
+ expect(cc.possessive?).to eq(true)
9
+ end
10
+
11
+ it 'is a possessive #002' do
12
+ token_a = 'John∮s'
13
+ token_b = 'John'
14
+ cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
15
+ expect(cc.possessive?).to eq(true)
16
+ end
17
+
18
+ it 'is a possessive #003' do
19
+ token_a = 'John'
20
+ token_b = 'Johnƪs'
21
+ cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
22
+ expect(cc.possessive?).to eq(true)
23
+ end
24
+
25
+ it 'is a possessive #004' do
26
+ token_a = 'John'
27
+ token_b = 'John∮s'
28
+ cc = ChatCorrect::Possessive.new(token_a: token_a, token_b: token_b)
29
+ expect(cc.possessive?).to eq(true)
30
+ end
31
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::PunctuationMasqueradingAsSpellingError do
4
+ it 'identifies words where the punctuation may masquerade as a spelling error in the algorithm #001' do
5
+ token_a = 'canƪt'
6
+ token_b = 'cant'
7
+ cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
8
+ expect(cc.exists?).to eq(true)
9
+ end
10
+
11
+ it 'identifies words where the punctuation may masquerade as a spelling error in the algorithm #002' do
12
+ token_a = 'cant'
13
+ token_b = 'canƪt'
14
+ cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
15
+ expect(cc.exists?).to eq(true)
16
+ end
17
+
18
+ it 'returns false for regualr spelling mistakes' do
19
+ token_a = 'speeling'
20
+ token_b = 'spelling'
21
+ cc = ChatCorrect::PunctuationMasqueradingAsSpellingError.new(token_a: token_a, token_b: token_b)
22
+ expect(cc.exists?).to eq(false)
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::Punctuation do
4
+ it 'returns true if the text is a punctuation mark #001' do
5
+ text = '?'
6
+ cc = ChatCorrect::Punctuation.new(text: text)
7
+ expect(cc.is_punctuation?).to eq(true)
8
+ end
9
+
10
+ it 'returns true if the text is a punctuation mark #002' do
11
+ text = '∯'
12
+ cc = ChatCorrect::Punctuation.new(text: text)
13
+ expect(cc.is_punctuation?).to eq(true)
14
+ end
15
+
16
+ it 'returns true if the text is a punctuation mark #003' do
17
+ text = 'hello'
18
+ cc = ChatCorrect::Punctuation.new(text: text)
19
+ expect(cc.is_punctuation?).to eq(false)
20
+ end
21
+ end
@@ -0,0 +1,59 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::Spelling do
4
+ it 'returns true if a spelling mistake is found #001' do
5
+ token_a = 'speeling'
6
+ token_b = 'spelling'
7
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
8
+ expect(cc.spelling_error?).to eq(true)
9
+ end
10
+
11
+ it 'returns false if a spelling mistake is not found #002' do
12
+ token_a = 'cold'
13
+ token_b = 'warm'
14
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
15
+ expect(cc.spelling_error?).to eq(false)
16
+ end
17
+
18
+ it 'returns false if a spelling mistake is not found #003' do
19
+ token_a = 'the'
20
+ token_b = 'of'
21
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
22
+ expect(cc.spelling_error?).to eq(false)
23
+ end
24
+
25
+ it 'returns false if a spelling mistake is not found #004' do
26
+ token_a = '??'
27
+ token_b = '???'
28
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
29
+ expect(cc.spelling_error?).to eq(false)
30
+ end
31
+
32
+ it 'returns true if a spelling mistake is found #005' do
33
+ token_a = 'original'
34
+ token_b = 'originnal'
35
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
36
+ expect(cc.spelling_error?).to eq(true)
37
+ end
38
+
39
+ it 'returns false if a spelling mistake is not found #006' do
40
+ token_a = 'a'
41
+ token_b = 'b'
42
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
43
+ expect(cc.spelling_error?).to eq(false)
44
+ end
45
+
46
+ it 'returns false if a spelling mistake is not found #007' do
47
+ token_a = 'that'
48
+ token_b = 'this'
49
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
50
+ expect(cc.spelling_error?).to eq(false)
51
+ end
52
+
53
+ it 'returns false if a spelling mistake is not found #007' do
54
+ token_a = 'is'
55
+ token_b = 'Is'
56
+ cc = ChatCorrect::Spelling.new(token_a: token_a, token_b: token_b)
57
+ expect(cc.spelling_error?).to eq(false)
58
+ end
59
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::Time do
4
+ it 'returns true if the text is a time' do
5
+ text = "10:25 AM"
6
+ cc = ChatCorrect::Time.new(text: text)
7
+ expect(cc.is_time?).to eq(true)
8
+ end
9
+
10
+ it 'returns true if the text is a time' do
11
+ text = "23:11"
12
+ cc = ChatCorrect::Time.new(text: text)
13
+ expect(cc.is_time?).to eq(true)
14
+ end
15
+
16
+ it 'returns false if the text is not a time' do
17
+ text = "January 1st: It's the new year."
18
+ cc = ChatCorrect::Time.new(text: text)
19
+ expect(cc.is_time?).to eq(false)
20
+ end
21
+ end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::Tokenize do
4
+ it 'correctly tokenizes test sentence #001' do
5
+ text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
6
+ cc = ChatCorrect::Tokenize.new(text: text)
7
+ expect(cc.tokenize).to eq(["Hello", "Ms.", "Piggy", ",", "this", "is", "John", ".", "We", "are", "selling", "a", "new", "fridge", "for", "$5☌000", ".", "That", "is", "a", "20", "%", "discount", "over", "the", "Nev.", "retailers", ".", "It", "is", "a", "∫", "MUST", "BUY", "∮", ",", "so", "donƪt", "hesistate", "."])
8
+ end
9
+
10
+ it 'correctly tokenizes test sentence #002' do
11
+ text = "Hello Ms. Piggy, this is John. We are selling a new fridge for $5,000. That is a 20% discount over the Nev. retailers. It is a 'MUST BUY', so don't hesistate."
12
+ cc = ChatCorrect::Tokenize.new(text: text)
13
+ expect(cc.tokenize_no_punct).to eq(["Hello", "Ms.", "Piggy", "this", "is", "John", "We", "are", "selling", "a", "new", "fridge", "for", "$5☌000", "That", "is", "a", "20", "discount", "over", "the", "Nev.", "retailers", "It", "is", "a", "∫", "MUST", "BUY", "∮", "so", "donƪt", "hesistate"])
14
+ end
15
+
16
+ it 'correctly tokenizes test sentence #003' do
17
+ text = "Lisa Raines, a lawyer and director of government relations for the Industrial Biotechnical Association, contends that a judge well-versed in patent law and the concerns of research-based industries would have ruled otherwise. And Judge Newman, a former patent lawyer, wrote in her dissent when the court denied a motion for a rehearing of the case by the full court, \'The panel's judicial legislation has affected an important high-technological industry, without regard to the consequences for research and innovation or the public interest.\' Says Ms. Raines, \'[The judgement] confirms our concern that the absence of patent lawyers on the court could prove troublesome.\'"
18
+ cc = ChatCorrect::Tokenize.new(text: text)
19
+ expect(cc.tokenize).to eq(["Lisa", "Raines", ",", "a", "lawyer", "and", "director", "of", "government", "relations", "for", "the", "Industrial", "Biotechnical", "Association", ",", "contends", "that", "a", "judge", "well-versed", "in", "patent", "law", "and", "the", "concerns", "of", "research-based", "industries", "would", "have", "ruled", "otherwise", ".", "And", "Judge", "Newman", ",", "a", "former", "patent", "lawyer", ",", "wrote", "in", "her", "dissent", "when", "the", "court", "denied", "a", "motion", "for", "a", "rehearing", "of", "the", "case", "by", "the", "full", "court", ",", "∫", "The", "panelƪs", "judicial", "legislation", "has", "affected", "an", "important", "high-technological", "industry", ",", "without", "regard", "to", "the", "consequences", "for", "research", "and", "innovation", "or", "the", "public", "interest", ".", "∫", "Says", "Ms.", "Raines", ",", "∫", "[", "The", "judgement", "]", "confirms", "our", "concern", "that", "the", "absence", "of", "patent", "lawyers", "on", "the", "court", "could", "prove", "troublesome", ".", "∮"])
20
+ end
21
+
22
+ it 'correctly tokenizes test sentence #004' do
23
+ text = 'Whether there will be eligible to become king to you?'
24
+ cc = ChatCorrect::Tokenize.new(text: text)
25
+ expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "?"])
26
+ end
27
+
28
+ it 'correctly tokenizes test sentence #005' do
29
+ cc = ChatCorrect::Tokenize.new(text: 'Whether there will be eligible to become king to you!')
30
+ expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "!"])
31
+ end
32
+
33
+ it 'correctly tokenizes test sentence #006' do
34
+ cc = ChatCorrect::Tokenize.new(text: 'Whether there will be eligible to become king to you.')
35
+ expect(cc.tokenize).to eq(["Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", "."])
36
+ end
37
+
38
+ it 'correctly tokenizes test sentence #007' do
39
+ cc = ChatCorrect::Tokenize.new(text: "\"Whether there will be eligible to become king to you.\"")
40
+ expect(cc.tokenize).to eq(["∬", "Whether", "there", "will", "be", "eligible", "to", "become", "king", "to", "you", ".", "∯"])
41
+ end
42
+
43
+ it 'correctly tokenizes test sentence #008' do
44
+ cc = ChatCorrect::Tokenize.new(text: 'His name is Mr. Smith.')
45
+ expect(cc.tokenize_no_punct).to eq(['His', 'name', 'is', 'Mr.', 'Smith'])
46
+ end
47
+
48
+ it 'correctly tokenizes test sentence #009' do
49
+ cc = ChatCorrect::Tokenize.new(text: 'His name is Mr. Smith.')
50
+ expect(cc.tokenize).to eq(['His', 'name', 'is', 'Mr.', 'Smith', '.'])
51
+ end
52
+
53
+ it 'correctly tokenizes test sentence #010' do
54
+ cc = ChatCorrect::Tokenize.new(text: 'His name is Col. Smith.')
55
+ expect(cc.tokenize).to eq(['His', 'name', 'is', 'Col.', 'Smith', '.'])
56
+ end
57
+
58
+ it 'correctly tokenizes test sentence #011' do
59
+ cc = ChatCorrect::Tokenize.new(text: 'She went to East Univ. to get her degree.')
60
+ expect(cc.tokenize).to eq(['She', 'went', 'to', 'East', 'Univ.', 'to', 'get', 'her', 'degree', '.'])
61
+ end
62
+
63
+ it 'correctly tokenizes test sentence #012' do
64
+ cc = ChatCorrect::Tokenize.new(text: 'He works at ABC Inc. on weekends.')
65
+ expect(cc.tokenize).to eq(['He', 'works', 'at', 'ABC', 'Inc.', 'on', 'weekends', '.'])
66
+ end
67
+
68
+ it 'correctly tokenizes test sentence #013' do
69
+ cc = ChatCorrect::Tokenize.new(text: 'He went to school in Mass. back in the day.')
70
+ expect(cc.tokenize).to eq(['He', 'went', 'to', 'school', 'in', 'Mass.', 'back', 'in', 'the', 'day', '.'])
71
+ end
72
+
73
+ it 'correctly tokenizes test sentence #014' do
74
+ cc = ChatCorrect::Tokenize.new(text: 'It is cold in Jan. they say.')
75
+ expect(cc.tokenize).to eq(['It', 'is', 'cold', 'in', 'Jan.', 'they', 'say', '.'])
76
+ end
77
+
78
+ it 'correctly tokenizes test sentence #015' do
79
+ cc = ChatCorrect::Tokenize.new(text: '1, 2, 3, etc. is the beat.')
80
+ expect(cc.tokenize).to eq(['1', ',', '2', ',', '3', ',', 'etc.', 'is', 'the', 'beat', '.'])
81
+ end
82
+
83
+ it 'correctly tokenizes test sentence #016' do
84
+ cc = ChatCorrect::Tokenize.new(text: 'Alfred E. Stone is a person.')
85
+ expect(cc.tokenize).to eq(['Alfred', 'E.', 'Stone', 'is', 'a', 'person', '.'])
86
+ end
87
+
88
+ it 'correctly tokenizes test sentence #017' do
89
+ cc = ChatCorrect::Tokenize.new(text: 'The U.S.A. is a country.')
90
+ expect(cc.tokenize).to eq(['The', 'U.S.A.', 'is', 'a', 'country', '.'])
91
+ end
92
+
93
+ it 'correctly tokenizes test sentence #018' do
94
+ cc = ChatCorrect::Tokenize.new(text: 'He works at ABC Inc.')
95
+ expect(cc.tokenize).to eq(['He', 'works', 'at', 'ABC', 'Inc', '.'])
96
+ end
97
+
98
+ it 'correctly tokenizes test sentence #019' do
99
+ cc = ChatCorrect::Tokenize.new(text: 'His name is Kevin')
100
+ expect(cc.tokenize).to eq(%w(His name is Kevin))
101
+ end
102
+
103
+ it 'correctly tokenizes test sentence #020' do
104
+ cc = ChatCorrect::Tokenize.new(text: 'He paid $10,000,000 for the new house which is equivalent to ¥1,000,000,000.')
105
+ expect(cc.tokenize).to eq(["He", "paid", "$10☌000☌000", "for", "the", "new", "house", "which", "is", "equivalent", "to", "¥1☌000☌000☌000", "."])
106
+ end
107
+
108
+ it 'correctly tokenizes test sentence #021' do
109
+ cc = ChatCorrect::Tokenize.new(text: 'Exclamation point requires both marks (Q.E.D.!).')
110
+ expect(cc.tokenize).to eq(['Exclamation', 'point', 'requires', 'both', 'marks', '(', 'Q.E.D.', '!', ')', '.'])
111
+ end
112
+
113
+ it 'correctly tokenizes test sentence #022' do
114
+ cc = ChatCorrect::Tokenize.new(text: 'An abbreviation that ends with a period must not be left hanging without it (in parentheses, e.g.), and a sentence containing a parenthesis must itself have terminal punctuation (are we almost done?).')
115
+ expect(cc.tokenize).to eq(['An', 'abbreviation', 'that', 'ends', 'with', 'a', 'period', 'must', 'not', 'be', 'left', 'hanging', 'without', 'it', '(', 'in', 'parentheses', ',', 'e.g.', ')', ',', 'and', 'a', 'sentence', 'containing', 'a', 'parenthesis', 'must', 'itself', 'have', 'terminal', 'punctuation', '(', 'are', 'we', 'almost', 'done', '?', ')', '.'])
116
+ end
117
+
118
+ it 'correctly tokenizes test sentence #023' do
119
+ cc = ChatCorrect::Tokenize.new(text: 'his name is mr. smith, king of the \'entire\' forest.')
120
+ expect(cc.tokenize).to eq(["his", "name", "is", "mr.", "smith", ",", "king", "of", "the", "∫", "entire", "∮", "forest", "."])
121
+ end
122
+
123
+ it 'correctly tokenizes test sentence #024' do
124
+ cc = ChatCorrect::Tokenize.new(text: 'Check out http://www.google.com/?this_is_a_url/hello-world.html for more info.')
125
+ expect(cc.tokenize).to eq(['Check', 'out', 'http://www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
126
+ end
127
+
128
+ it 'correctly tokenizes test sentence #025' do
129
+ cc = ChatCorrect::Tokenize.new(text: 'Check out https://www.google.com/?this_is_a_url/hello-world.html for more info.')
130
+ expect(cc.tokenize).to eq(['Check', 'out', 'https://www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
131
+ end
132
+
133
+ it 'correctly tokenizes test sentence #026' do
134
+ cc = ChatCorrect::Tokenize.new(text: 'Check out www.google.com/?this_is_a_url/hello-world.html for more info.')
135
+ expect(cc.tokenize).to eq(['Check', 'out', 'www.google.com/?this_is_a_url/hello-world.html', 'for', 'more', 'info', '.'])
136
+ end
137
+
138
+ it 'correctly tokenizes test sentence #027' do
139
+ cc = ChatCorrect::Tokenize.new(text: 'Please email example@example.com for more info.')
140
+ expect(cc.tokenize).to eq(['Please', 'email', 'example@example.com', 'for', 'more', 'info', '.'])
141
+ end
142
+ end
@@ -0,0 +1,60 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ChatCorrect::Verb do
4
+
5
+ it 'returns true if a verb error is found #001' do
6
+ word = 'was'
7
+ text = 'He is awesome.'
8
+ pos = 'v'
9
+ cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
10
+ expect(cc.verb_error?).to eq(true)
11
+ end
12
+
13
+ it 'returns true if a verb error is found #002' do
14
+ word = 'buy'
15
+ text = 'He bought some shoes.'
16
+ pos = 'v'
17
+ cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
18
+ expect(cc.verb_error?).to eq(true)
19
+ end
20
+
21
+ it 'returns false if it is not a verb #003' do
22
+ word = 'buy'
23
+ text = 'He bought some shoes.'
24
+ pos = 'n'
25
+ cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
26
+ expect(cc.verb_error?).to eq(false)
27
+ end
28
+
29
+ it 'returns false if it is not a verb error #004' do
30
+ word = 'threw'
31
+ text = 'He bought some shoes.'
32
+ pos = 'v'
33
+ cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
34
+ expect(cc.verb_error?).to eq(false)
35
+ end
36
+
37
+ it 'returns true if a verb error is found #005' do
38
+ word = 'eat'
39
+ text = 'I ate dinner.'
40
+ pos = 'v'
41
+ cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
42
+ expect(cc.verb_error?).to eq(true)
43
+ end
44
+
45
+ it 'returns true if a verb error is found #005' do
46
+ word = 'win'
47
+ text = 'I won a medal.'
48
+ pos = 'v'
49
+ cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
50
+ expect(cc.verb_error?).to eq(true)
51
+ end
52
+
53
+ it 'returns true if a verb error is found #005' do
54
+ word = 'file'
55
+ text = 'I filed the papers.'
56
+ pos = 'v'
57
+ cc = ChatCorrect::Verb.new(word: word, pos: pos, text: text)
58
+ expect(cc.verb_error?).to eq(true)
59
+ end
60
+ end
@@ -0,0 +1 @@
1
+ require 'chat_correct'