chat_correct 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +1 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +208 -0
- data/Rakefile +4 -0
- data/chat_correct.gemspec +28 -0
- data/lib/chat_correct/capitalization.rb +13 -0
- data/lib/chat_correct/combine_multi_word_verbs.rb +51 -0
- data/lib/chat_correct/common_verb_mistake.rb +62 -0
- data/lib/chat_correct/contraction.rb +103 -0
- data/lib/chat_correct/correct.rb +352 -0
- data/lib/chat_correct/corrections_hash.rb +204 -0
- data/lib/chat_correct/mistake_analyzer.rb +40 -0
- data/lib/chat_correct/pluralization.rb +22 -0
- data/lib/chat_correct/possessive.rb +25 -0
- data/lib/chat_correct/punctuation.rb +17 -0
- data/lib/chat_correct/punctuation_masquerading_as_spelling_error.rb +14 -0
- data/lib/chat_correct/spelling.rb +20 -0
- data/lib/chat_correct/time.rb +14 -0
- data/lib/chat_correct/tokenize.rb +164 -0
- data/lib/chat_correct/verb.rb +65 -0
- data/lib/chat_correct/version.rb +3 -0
- data/lib/chat_correct.rb +16 -0
- data/spec/chat_correct/capitalization_spec.rb +17 -0
- data/spec/chat_correct/combine_multi_word_verbs_spec.rb +39 -0
- data/spec/chat_correct/common_verb_mistake_spec.rb +24 -0
- data/spec/chat_correct/contraction_spec.rb +259 -0
- data/spec/chat_correct/correct_spec.rb +1650 -0
- data/spec/chat_correct/mistake_analyzer_spec.rb +99 -0
- data/spec/chat_correct/pluralization_spec.rb +31 -0
- data/spec/chat_correct/possessive_spec.rb +31 -0
- data/spec/chat_correct/punctuation_masquerading_as_spelling_error_spec.rb +24 -0
- data/spec/chat_correct/punctuation_spec.rb +21 -0
- data/spec/chat_correct/spelling_spec.rb +59 -0
- data/spec/chat_correct/time_spec.rb +21 -0
- data/spec/chat_correct/tokenize_spec.rb +142 -0
- data/spec/chat_correct/verb_spec.rb +60 -0
- data/spec/spec_helper.rb +1 -0
- metadata +201 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 83347780fd13ba3682aa33265cca80906f4e2005
|
4
|
+
data.tar.gz: 170d4c280a553ae9c554ba8396594664be6ebf31
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 001eba96eb826a38d918a83eff12673f5ba80e81f30ee9ddefe038ad1c187c84fa266508941633ea40d7c7f28b91f961a835bbd6c89f6c679725d4b2cec4090f
|
7
|
+
data.tar.gz: c33e23f38d08530f2472369d8af0edfd3c254aa7532665564785953b73a76da2d9c43ca4e07c293bbed79dcc247b56eddd353e01deddd16d71f3019eb1217fcb
|
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Kevin S. Dias
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
# Chat Correct
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/chat_correct.svg)](http://badge.fury.io/rb/chat_correct) [![Build Status](https://travis-ci.org/diasks2/chat_correct.png)](https://travis-ci.org/diasks2/chat_correct) [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/diasks2/chat_correct/blob/master/LICENSE.txt)
|
4
|
+
|
5
|
+
A Ruby gem to help students improve their English. A teacher can correct a student's sentence and this gem will automatically provide information on the type of error (i.e. punctuation, spelling, etc.), the placement of the errors, and the number of errors.
|
6
|
+
|
7
|
+
Live Demo: [Chat Correct chat room application](http://www.chat-correct.com)
|
8
|
+
|
9
|
+
![](https://s3.amazonaws.com/tm-town-nlp-resources/chat_correct_screenshot.jpg)
|
10
|
+
|
11
|
+
##Install
|
12
|
+
|
13
|
+
**Ruby**
|
14
|
+
*Supports Ruby 2.1.5 and above*
|
15
|
+
```
|
16
|
+
gem install chat_correct
|
17
|
+
```
|
18
|
+
|
19
|
+
**Ruby on Rails**
|
20
|
+
Add this line to your application’s Gemfile:
|
21
|
+
```ruby
|
22
|
+
gem 'chat_correct'
|
23
|
+
```
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
#### Correct
|
28
|
+
|
29
|
+
The correct method returns a hash of the original sentence interleaved with the corrected sentence. The idea is that you can use styling in your output progam to highlight the errors (color, **font weight**, ~~strikethrough~~, etc.).
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
os = "is the, puncttuation are wrong."
|
33
|
+
cs = "Is the punctuation wrong?"
|
34
|
+
cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
|
35
|
+
cc.correct
|
36
|
+
|
37
|
+
# => {
|
38
|
+
# 0 => {
|
39
|
+
# 'token' => 'is',
|
40
|
+
# 'type' => 'capitalization_mistake'
|
41
|
+
# },
|
42
|
+
# 1 => {
|
43
|
+
# 'token' => 'Is',
|
44
|
+
# 'type' => 'capitalization_correction'
|
45
|
+
# },
|
46
|
+
# 2 => {
|
47
|
+
# 'token' => 'the',
|
48
|
+
# 'type' => 'no_mistake'
|
49
|
+
# },
|
50
|
+
# 3 => {
|
51
|
+
# 'token' => ',',
|
52
|
+
# 'type' => 'punctuation_mistake'
|
53
|
+
# },
|
54
|
+
# 4 => {
|
55
|
+
# 'token' => 'puncttuation',
|
56
|
+
# 'type' => 'spelling_mistake'
|
57
|
+
# },
|
58
|
+
# 5 => {
|
59
|
+
# 'token' => 'punctuation',
|
60
|
+
# 'type' => 'spelling_correction'
|
61
|
+
# },
|
62
|
+
# 6 => {
|
63
|
+
# 'token' => 'are',
|
64
|
+
# 'type' => 'unnecessary_word_mistake'
|
65
|
+
# },
|
66
|
+
# 7 => {
|
67
|
+
# 'token' => 'wrong',
|
68
|
+
# 'type' => 'no_mistake'
|
69
|
+
# },
|
70
|
+
# 8 => {
|
71
|
+
# 'token' => '.',
|
72
|
+
# 'type' => 'punctuation_mistake'
|
73
|
+
# },
|
74
|
+
# 9 => {
|
75
|
+
# 'token' => '?',
|
76
|
+
# 'type' => 'punctuation_correction'
|
77
|
+
# }
|
78
|
+
# }
|
79
|
+
|
80
|
+
cc.correct[5]['token']
|
81
|
+
# => 'punctuation'
|
82
|
+
|
83
|
+
cc.correct[5]['type']
|
84
|
+
# => 'spelling_correction'
|
85
|
+
|
86
|
+
```
|
87
|
+
|
88
|
+
#### Mistakes
|
89
|
+
|
90
|
+
The mistakes method returns a hash of each mistake, ordered by its position in the sentence. For each mistake the method returns the `position`, `error_type`, `mistake`, and `correction`.
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
os = "is the, puncttuation are wrong."
|
94
|
+
cs = "Is the punctuation wrong?"
|
95
|
+
cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
|
96
|
+
cc.mistakes
|
97
|
+
|
98
|
+
# => {
|
99
|
+
# 0 => {
|
100
|
+
# 'position' => 0,
|
101
|
+
# 'error_type' => 'capitalization',
|
102
|
+
# 'mistake' => 'is',
|
103
|
+
# 'correction' => 'Is'
|
104
|
+
# },
|
105
|
+
# 1 => {
|
106
|
+
# 'position' => 3,
|
107
|
+
# 'error_type' => 'punctuation',
|
108
|
+
# 'mistake' => ',',
|
109
|
+
# 'correction' => ''
|
110
|
+
# },
|
111
|
+
# 2 => {
|
112
|
+
# 'position' => 4,
|
113
|
+
# 'error_type' => 'spelling',
|
114
|
+
# 'mistake' => 'puncttuation',
|
115
|
+
# 'correction' => 'punctuation'
|
116
|
+
# },
|
117
|
+
# 3 => {
|
118
|
+
# 'position' => 3,
|
119
|
+
# 'error_type' => 'unnecessary_word',
|
120
|
+
# 'mistake' => 'are',
|
121
|
+
# 'correction' => ''
|
122
|
+
# },
|
123
|
+
# 4 => {
|
124
|
+
# 'position' => 3,
|
125
|
+
# 'error_type' => 'punctuation',
|
126
|
+
# 'mistake' => '.',
|
127
|
+
# 'correction' => '?'
|
128
|
+
# }
|
129
|
+
# }
|
130
|
+
|
131
|
+
cc.mistakes[4]['correction']
|
132
|
+
# => '?'
|
133
|
+
|
134
|
+
cc.mistakes[1]['mistake']
|
135
|
+
# => ','
|
136
|
+
```
|
137
|
+
|
138
|
+
#### Mistake Report
|
139
|
+
|
140
|
+
The mistake report method returns a hash containing the number of mistakes for each error type.
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
os = "is the, puncttuation are wrong."
|
144
|
+
cs = "Is the punctuation wrong?"
|
145
|
+
cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
|
146
|
+
cc.mistake_report
|
147
|
+
# => {
|
148
|
+
# 'missing_word' => 0,
|
149
|
+
# 'unnecessary_word' => 1,
|
150
|
+
# 'spelling' => 1,
|
151
|
+
# 'verb' => 0,
|
152
|
+
# 'punctuation' => 2,
|
153
|
+
# 'word_order' => 0,
|
154
|
+
# 'capitalization' => 1,
|
155
|
+
# 'duplicate_word' => 0,
|
156
|
+
# 'word_choice' => 0,
|
157
|
+
# 'pluralization' => 0,
|
158
|
+
# 'possessive' => 0,
|
159
|
+
# 'stylistic_choice' => 0
|
160
|
+
# }
|
161
|
+
|
162
|
+
cc.mistake_report['punctuation']
|
163
|
+
# => 2
|
164
|
+
```
|
165
|
+
|
166
|
+
#### Number of Mistakes
|
167
|
+
|
168
|
+
The number of mistakes method returns the total number of mistakes in the original sentence.
|
169
|
+
|
170
|
+
```ruby
|
171
|
+
os = "is the, puncttuation are wrong."
|
172
|
+
cs = "Is the punctuation wrong?"
|
173
|
+
cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
|
174
|
+
cc.number_of_mistakes
|
175
|
+
# => 5
|
176
|
+
```
|
177
|
+
|
178
|
+
## Contributing
|
179
|
+
|
180
|
+
1. Fork it ( https://github.com/diasks2/chat_correct/fork )
|
181
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
182
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
183
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
184
|
+
5. Create a new Pull Request
|
185
|
+
|
186
|
+
## License
|
187
|
+
|
188
|
+
The MIT License (MIT)
|
189
|
+
|
190
|
+
Copyright (c) 2015 Kevin S. Dias
|
191
|
+
|
192
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
193
|
+
of this software and associated documentation files (the "Software"), to deal
|
194
|
+
in the Software without restriction, including without limitation the rights
|
195
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
196
|
+
copies of the Software, and to permit persons to whom the Software is
|
197
|
+
furnished to do so, subject to the following conditions:
|
198
|
+
|
199
|
+
The above copyright notice and this permission notice shall be included in
|
200
|
+
all copies or substantial portions of the Software.
|
201
|
+
|
202
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
203
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
204
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
205
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
206
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
207
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
208
|
+
THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'chat_correct/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "chat_correct"
|
8
|
+
spec.version = ChatCorrect::VERSION
|
9
|
+
spec.authors = ["Kevin S. Dias"]
|
10
|
+
spec.email = ["diasks2@gmail.com"]
|
11
|
+
spec.summary = %q{Returns the errors and error types when an incorrect English sentence is diffed with a correct English sentence.}
|
12
|
+
spec.description = %q{A Ruby gem to help students improve their English. A teacher can correct a student's sentence and this gem will automatically provide information on the type of error (i.e. punctuation, spelling, etc.), the placement of the errors, and the number of errors.}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_runtime_dependency "levenshtein-ffi"
|
25
|
+
spec.add_runtime_dependency "linguistics", "~> 2.0.2"
|
26
|
+
spec.add_runtime_dependency "verbs"
|
27
|
+
spec.add_runtime_dependency "engtagger"
|
28
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class Capitalization
|
3
|
+
attr_reader :token_a, :token_b
|
4
|
+
def initialize(token_a:, token_b:)
|
5
|
+
@token_a = token_a
|
6
|
+
@token_b = token_b
|
7
|
+
end
|
8
|
+
|
9
|
+
def capitalization_error?
|
10
|
+
token_a.downcase.eql?(token_b.downcase) && token_a != token_b
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'engtagger'
|
2
|
+
|
3
|
+
module ChatCorrect
|
4
|
+
class CombineMultiWordVerbs
|
5
|
+
TOKEN_ARRAY = ['are', 'am', 'was', 'were', 'have', 'has', 'had', 'will', 'would', 'could', 'did', 'arenƪt', 'wasnƪt', 'werenƪt', 'havenƪt', 'hasnƪt', 'hadnƪt', 'wouldnƪt', 'couldnƪt', 'didnƪt']
|
6
|
+
TOKEN_ARRAY_2 = ['are', 'am', 'was', 'were', 'have', 'has', 'had', 'will', 'would', 'did', 'could']
|
7
|
+
attr_reader :text
|
8
|
+
def initialize(text:)
|
9
|
+
@text = text
|
10
|
+
end
|
11
|
+
|
12
|
+
def combine
|
13
|
+
tgr = EngTagger.new
|
14
|
+
tokens = ChatCorrect::Tokenize.new(text: text).tokenize
|
15
|
+
sentence_tagged = tgr.add_tags(text).split
|
16
|
+
tokens_to_delete = []
|
17
|
+
tokens.each_with_index do |token, index|
|
18
|
+
case
|
19
|
+
when ((token.eql?('will') && tokens[index + 1].eql?('have')) || (token.eql?('would') && tokens[index + 1].eql?('have')) || (token.eql?('had') && tokens[index + 1].eql?('been'))) &&
|
20
|
+
sentence_tagged[index + 2].to_s.partition('>').first[1..-1][0].eql?('v')
|
21
|
+
tokens[index] = token + ' ' + tokens[index + 1] + ' ' + tokens[index + 2]
|
22
|
+
tokens_to_delete << tokens[index + 1].to_s
|
23
|
+
tokens_to_delete << tokens[index + 2].to_s
|
24
|
+
when TOKEN_ARRAY_2.include?(token) &&
|
25
|
+
tokens[index + 1].to_s.eql?('not') &&
|
26
|
+
sentence_tagged[index + 2].to_s[1].to_s.eql?('v')
|
27
|
+
tokens[index] = token + ' ' + tokens[index + 1] + ' ' + tokens[index + 2]
|
28
|
+
tokens_to_delete << tokens[index + 1].to_s
|
29
|
+
tokens_to_delete << tokens[index + 2].to_s
|
30
|
+
when TOKEN_ARRAY.include?(token) &&
|
31
|
+
(sentence_tagged[index + 1].to_s[1].to_s.eql?('v') ||
|
32
|
+
sentence_tagged[index + 1].to_s[1..2].to_s.eql?('rb')) &&
|
33
|
+
tokens[index - 1].exclude?(' ') &&
|
34
|
+
tokens[index + 1] != 'had'
|
35
|
+
tokens[index] = token + ' ' + tokens[index + 1]
|
36
|
+
tokens_to_delete << tokens[index + 1].to_s
|
37
|
+
end
|
38
|
+
end
|
39
|
+
delete_tokens_from_array(tokens, tokens_to_delete)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def delete_tokens_from_array(tokens, array)
|
45
|
+
array.each do |token_to_delete|
|
46
|
+
tokens.delete(token_to_delete) if tokens.include?(token_to_delete)
|
47
|
+
end
|
48
|
+
tokens
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class CommonVerbMistake
|
3
|
+
COMMON_VERB_MISTAKES =
|
4
|
+
{ "flied" => "flew",
|
5
|
+
"weared" => "wore",
|
6
|
+
"finded" => "found",
|
7
|
+
"fighted" => "fought",
|
8
|
+
"clinged" => "clung",
|
9
|
+
"bleeded" => "bled",
|
10
|
+
"bringed" => "brought",
|
11
|
+
"catched" => "caught",
|
12
|
+
"cutted" => "cut",
|
13
|
+
"feeled" => "felt",
|
14
|
+
"drived" => "drove",
|
15
|
+
"falled" => "fell",
|
16
|
+
"forgetted" => "forgot",
|
17
|
+
"freezed" => "froze",
|
18
|
+
"gived" => "gave",
|
19
|
+
"heared" => "heard",
|
20
|
+
"hurted" => "hurt",
|
21
|
+
"keeped" => "kept",
|
22
|
+
"knowed" => "knew",
|
23
|
+
"leaved" => "left",
|
24
|
+
"losed" => "lost",
|
25
|
+
"meaned" => "meant",
|
26
|
+
"quited" => "quit",
|
27
|
+
"quitted" => "quit",
|
28
|
+
"ridded" => "rode",
|
29
|
+
"runned" => "ran",
|
30
|
+
"rised" => "rose",
|
31
|
+
"seed" => "saw",
|
32
|
+
"singed" => "sang",
|
33
|
+
"sitted" => "sat",
|
34
|
+
"sited" => "sat",
|
35
|
+
"speaked" => "spoke",
|
36
|
+
"standed" => "stood",
|
37
|
+
"sweared" => "swore",
|
38
|
+
"swimmed" => "swam",
|
39
|
+
"thinked" => "thought",
|
40
|
+
"telled" => "told",
|
41
|
+
"taked" => "took",
|
42
|
+
"stringed" => "strung",
|
43
|
+
"teached" => "taught",
|
44
|
+
"waked" => "woke",
|
45
|
+
"weeped" => "wept",
|
46
|
+
"winned" => "won",
|
47
|
+
"writed" => "wrote",
|
48
|
+
"weaved" => "wove",
|
49
|
+
"gots" => "have"
|
50
|
+
}
|
51
|
+
attr_reader :token_a, :token_b
|
52
|
+
def initialize(token_a:, token_b:)
|
53
|
+
@token_a = token_a
|
54
|
+
@token_b = token_b
|
55
|
+
end
|
56
|
+
|
57
|
+
def exists?
|
58
|
+
COMMON_VERB_MISTAKES[token_a].eql?(token_b) ||
|
59
|
+
COMMON_VERB_MISTAKES[token_b].eql?(token_a)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module ChatCorrect
|
2
|
+
class Contraction
|
3
|
+
NOT_CONTRACTION = {
|
4
|
+
'am' => "ain't",
|
5
|
+
'do' => "don't",
|
6
|
+
'will' => "won't",
|
7
|
+
'shall' => "shan't",
|
8
|
+
'is' => "isn't"
|
9
|
+
}
|
10
|
+
IRREGULAR_CONTRACTION = {
|
11
|
+
['is', 'not'] => "ain't",
|
12
|
+
['madam', nil] => "ma'am",
|
13
|
+
['never-do-well', nil] => "ne'er-do-well",
|
14
|
+
['cat-of-nine-tails', nil] => "cat-o'-nine-tails",
|
15
|
+
['jack-of-the-lantern', nil] => "jack-o'-lantern",
|
16
|
+
['will-of-the-wisp', nil] => "will-o'-the-wisp"
|
17
|
+
}
|
18
|
+
attr_reader :token_a, :token_b, :contraction
|
19
|
+
def initialize(token_a:, token_b:, contraction:)
|
20
|
+
return false if token_a.nil? || contraction.nil?
|
21
|
+
@token_a = token_a.downcase
|
22
|
+
token_b ? @token_b = token_b.downcase : @token_b = token_b
|
23
|
+
@contraction = contraction.downcase.gsub(/ƪ/, "'")
|
24
|
+
end
|
25
|
+
|
26
|
+
def contraction?
|
27
|
+
!token_a.nil? && !contraction.nil? &&
|
28
|
+
(is_a_not_contraction? ||
|
29
|
+
is_an_irregular_contraction? ||
|
30
|
+
is_an_us_contraction? ||
|
31
|
+
is_an_am_contraction? ||
|
32
|
+
is_an_are_contraction? ||
|
33
|
+
is_an_is_does_has_contraction? ||
|
34
|
+
is_a_have_contraction? ||
|
35
|
+
is_a_had_did_would_contraction? ||
|
36
|
+
is_a_will_contraction? ||
|
37
|
+
is_an_of_contraction? ||
|
38
|
+
is_an_it_contraction? ||
|
39
|
+
is_a_them_contraction?)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def is_a_not_contraction?
|
45
|
+
token_b.eql?('not') && (NOT_CONTRACTION[token_a].eql?(contraction) ||
|
46
|
+
(contraction.partition("n't")[0].eql?(token_a)) &&
|
47
|
+
contraction.partition("n't")[2].empty?)
|
48
|
+
end
|
49
|
+
|
50
|
+
def is_an_irregular_contraction?
|
51
|
+
IRREGULAR_CONTRACTION[[token_a, token_b]].eql?(contraction)
|
52
|
+
end
|
53
|
+
|
54
|
+
def is_an_us_contraction?
|
55
|
+
token_b.eql?('us') && contraction.partition("'s")[0].eql?(token_a) &&
|
56
|
+
token_a.eql?('let') && contraction.partition("'s")[2].empty?
|
57
|
+
end
|
58
|
+
|
59
|
+
def is_an_am_contraction?
|
60
|
+
token_b.eql?("am") && contraction.partition("'m")[0].eql?(token_a) &&
|
61
|
+
token_a.eql?('i') && contraction.partition("'m")[2].empty?
|
62
|
+
end
|
63
|
+
|
64
|
+
def is_an_are_contraction?
|
65
|
+
token_b.eql?('are') && contraction.partition("'re")[0].eql?(token_a) &&
|
66
|
+
contraction.partition("'re")[2].empty?
|
67
|
+
end
|
68
|
+
|
69
|
+
def is_an_is_does_has_contraction?
|
70
|
+
(token_b.eql?('is') || token_b.eql?('does') || token_b.eql?('has')) &&
|
71
|
+
contraction.partition("'s")[0].eql?(token_a) && contraction.partition("'s")[2].empty?
|
72
|
+
end
|
73
|
+
|
74
|
+
def is_a_have_contraction?
|
75
|
+
token_b.eql?('have') && contraction.partition("'ve")[0].eql?(token_a) &&
|
76
|
+
contraction.partition("'ve")[2].empty?
|
77
|
+
end
|
78
|
+
|
79
|
+
def is_a_had_did_would_contraction?
|
80
|
+
(token_b.eql?('had') || token_b.eql?('did') || token_b.eql?('would')) &&
|
81
|
+
contraction.partition("'d")[0].eql?(token_a) &&
|
82
|
+
contraction.partition("'d")[2].empty?
|
83
|
+
end
|
84
|
+
|
85
|
+
def is_a_will_contraction?
|
86
|
+
token_b.eql?('will') && contraction.partition("'ll")[0].eql?(token_a) &&
|
87
|
+
contraction.partition("'ll")[2].empty?
|
88
|
+
end
|
89
|
+
|
90
|
+
def is_an_of_contraction?
|
91
|
+
token_a.eql?('of') && (contraction.eql?("o'") ||
|
92
|
+
contraction.partition("o' ")[-1].eql?(token_b))
|
93
|
+
end
|
94
|
+
|
95
|
+
def is_an_it_contraction?
|
96
|
+
token_a.eql?('it') && contraction.partition("'t")[-1].eql?(token_b)
|
97
|
+
end
|
98
|
+
|
99
|
+
def is_a_them_contraction?
|
100
|
+
token_b.eql?('them') && contraction.partition(" 'em")[0].eql?(token_a)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|