RubyGems - chat_correct - Versions diffs - 0.0.1 - Mend

chat_correct 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +7 -0
data/.gitignore +14 -0
data/.rspec +1 -0
data/.travis.yml +4 -0
data/Gemfile +4 -0
data/LICENSE.txt +22 -0
data/README.md +208 -0
data/Rakefile +4 -0
data/chat_correct.gemspec +28 -0
data/lib/chat_correct/capitalization.rb +13 -0
data/lib/chat_correct/combine_multi_word_verbs.rb +51 -0
data/lib/chat_correct/common_verb_mistake.rb +62 -0
data/lib/chat_correct/contraction.rb +103 -0
data/lib/chat_correct/correct.rb +352 -0
data/lib/chat_correct/corrections_hash.rb +204 -0
data/lib/chat_correct/mistake_analyzer.rb +40 -0
data/lib/chat_correct/pluralization.rb +22 -0
data/lib/chat_correct/possessive.rb +25 -0
data/lib/chat_correct/punctuation.rb +17 -0
data/lib/chat_correct/punctuation_masquerading_as_spelling_error.rb +14 -0
data/lib/chat_correct/spelling.rb +20 -0
data/lib/chat_correct/time.rb +14 -0
data/lib/chat_correct/tokenize.rb +164 -0
data/lib/chat_correct/verb.rb +65 -0
data/lib/chat_correct/version.rb +3 -0
data/lib/chat_correct.rb +16 -0
data/spec/chat_correct/capitalization_spec.rb +17 -0
data/spec/chat_correct/combine_multi_word_verbs_spec.rb +39 -0
data/spec/chat_correct/common_verb_mistake_spec.rb +24 -0
data/spec/chat_correct/contraction_spec.rb +259 -0
data/spec/chat_correct/correct_spec.rb +1650 -0
data/spec/chat_correct/mistake_analyzer_spec.rb +99 -0
data/spec/chat_correct/pluralization_spec.rb +31 -0
data/spec/chat_correct/possessive_spec.rb +31 -0
data/spec/chat_correct/punctuation_masquerading_as_spelling_error_spec.rb +24 -0
data/spec/chat_correct/punctuation_spec.rb +21 -0
data/spec/chat_correct/spelling_spec.rb +59 -0
data/spec/chat_correct/time_spec.rb +21 -0
data/spec/chat_correct/tokenize_spec.rb +142 -0
data/spec/chat_correct/verb_spec.rb +60 -0
data/spec/spec_helper.rb +1 -0
metadata +201 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 83347780fd13ba3682aa33265cca80906f4e2005
+  data.tar.gz: 170d4c280a553ae9c554ba8396594664be6ebf31
+SHA512:
+  metadata.gz: 001eba96eb826a38d918a83eff12673f5ba80e81f30ee9ddefe038ad1c187c84fa266508941633ea40d7c7f28b91f961a835bbd6c89f6c679725d4b2cec4090f
+  data.tar.gz: c33e23f38d08530f2472369d8af0edfd3c254aa7532665564785953b73a76da2d9c43ca4e07c293bbed79dcc247b56eddd353e01deddd16d71f3019eb1217fcb

data/.gitignore ADDED Viewed

@@ -0,0 +1,14 @@
+/.bundle/
+/.yardoc
+/Gemfile.lock
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+*.bundle
+*.so
+*.o
+*.a
+mkmf.log

data/.rspec ADDED Viewed

	@@ -0,0 +1 @@
1	+ --color

data/.travis.yml ADDED Viewed

@@ -0,0 +1,4 @@
+language: ruby
+rvm:
+  - "2.1.5"
+  - "2.2.0"

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in chat_correct.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2015 Kevin S. Dias
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,208 @@
+# Chat Correct
+[![Gem Version](https://badge.fury.io/rb/chat_correct.svg)](http://badge.fury.io/rb/chat_correct) [![Build Status](https://travis-ci.org/diasks2/chat_correct.png)](https://travis-ci.org/diasks2/chat_correct) [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/diasks2/chat_correct/blob/master/LICENSE.txt)
+A Ruby gem to help students improve their English. A teacher can correct a student's sentence and this gem will automatically provide information on the type of error (i.e. punctuation, spelling, etc.), the placement of the errors, and the number of errors.
+Live Demo: [Chat Correct chat room application](http://www.chat-correct.com)
+![](https://s3.amazonaws.com/tm-town-nlp-resources/chat_correct_screenshot.jpg)
+##Install
+**Ruby**
+*Supports Ruby 2.1.5 and above*
+```
+gem install chat_correct
+```
+**Ruby on Rails**
+Add this line to your application’s Gemfile:
+```ruby
+gem 'chat_correct'
+```
+## Usage
+#### Correct
+The correct method returns a hash of the original sentence interleaved with the corrected sentence. The idea is that you can use styling in your output progam to highlight the errors (color, **font weight**, ~~strikethrough~~, etc.).
+```ruby
+os = "is the, puncttuation are wrong."
+cs = "Is the punctuation wrong?"
+cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
+cc.correct
+# =>  {
+#       0 => {
+#        'token' => 'is',
+#        'type' => 'capitalization_mistake'
+#       },
+#       1 => {
+#        'token' => 'Is',
+#        'type' => 'capitalization_correction'
+#       },
+#       2 => {
+#        'token' => 'the',
+#        'type' => 'no_mistake'
+#       },
+#       3 => {
+#        'token' => ',',
+#        'type' => 'punctuation_mistake'
+#       },
+#       4 => {
+#        'token' => 'puncttuation',
+#        'type' => 'spelling_mistake'
+#       },
+#       5 => {
+#        'token' => 'punctuation',
+#        'type' => 'spelling_correction'
+#       },
+#       6 => {
+#        'token' => 'are',
+#        'type' => 'unnecessary_word_mistake'
+#       },
+#       7 => {
+#        'token' => 'wrong',
+#        'type' => 'no_mistake'
+#       },
+#       8 => {
+#        'token' => '.',
+#        'type' => 'punctuation_mistake'
+#       },
+#       9 => {
+#        'token' => '?',
+#        'type' => 'punctuation_correction'
+#       }
+#     }
+cc.correct[5]['token']
+# => 'punctuation'
+cc.correct[5]['type']
+# => 'spelling_correction'
+```
+#### Mistakes
+The mistakes method returns a hash of each mistake, ordered by its position in the sentence. For each mistake the method returns the `position`, `error_type`, `mistake`, and `correction`.
+```ruby
+os = "is the, puncttuation are wrong."
+cs = "Is the punctuation wrong?"
+cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
+cc.mistakes
+# =>  {
+#       0 => {
+#        'position' => 0,
+#        'error_type' => 'capitalization',
+#        'mistake' => 'is',
+#        'correction' => 'Is'
+#       },
+#       1 => {
+#        'position' => 3,
+#        'error_type' => 'punctuation',
+#        'mistake' => ',',
+#        'correction' => ''
+#       },
+#       2 => {
+#        'position' => 4,
+#        'error_type' => 'spelling',
+#        'mistake' => 'puncttuation',
+#        'correction' => 'punctuation'
+#       },
+#       3 => {
+#        'position' => 3,
+#        'error_type' => 'unnecessary_word',
+#        'mistake' => 'are',
+#        'correction' => ''
+#       },
+#       4 => {
+#        'position' => 3,
+#        'error_type' => 'punctuation',
+#        'mistake' => '.',
+#        'correction' => '?'
+#       }
+#     }
+cc.mistakes[4]['correction']
+# => '?'
+cc.mistakes[1]['mistake']
+# => ','
+```
+#### Mistake Report
+The mistake report method returns a hash containing the number of mistakes for each error type.
+```ruby
+os = "is the, puncttuation are wrong."
+cs = "Is the punctuation wrong?"
+cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
+cc.mistake_report
+# => {
+#      'missing_word'     => 0,
+#      'unnecessary_word' => 1,
+#      'spelling'         => 1,
+#      'verb'             => 0,
+#      'punctuation'      => 2,
+#      'word_order'       => 0,
+#      'capitalization'   => 1,
+#      'duplicate_word'   => 0,
+#      'word_choice'      => 0,
+#      'pluralization'    => 0,
+#      'possessive'       => 0,
+#      'stylistic_choice' => 0
+#    }
+cc.mistake_report['punctuation']
+# => 2
+```
+#### Number of Mistakes
+The number of mistakes method returns the total number of mistakes in the original sentence.
+```ruby
+os = "is the, puncttuation are wrong."
+cs = "Is the punctuation wrong?"
+cc = ChatCorrect.new(original_sentence: os, corrected_sentence: cs)
+cc.number_of_mistakes
+# => 5
+```
+## Contributing
+1. Fork it ( https://github.com/diasks2/chat_correct/fork )
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create a new Pull Request
+## License
+The MIT License (MIT)
+Copyright (c) 2015 Kevin S. Dias
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/Rakefile ADDED Viewed

@@ -0,0 +1,4 @@
+require 'bundler/gem_tasks'
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task :default => :spec

data/chat_correct.gemspec ADDED Viewed

@@ -0,0 +1,28 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'chat_correct/version'
+Gem::Specification.new do |spec|
+  spec.name          = "chat_correct"
+  spec.version       = ChatCorrect::VERSION
+  spec.authors       = ["Kevin S. Dias"]
+  spec.email         = ["diasks2@gmail.com"]
+  spec.summary       = %q{Returns the errors and error types when an incorrect English sentence is diffed with a correct English sentence.}
+  spec.description   = %q{A Ruby gem to help students improve their English. A teacher can correct a student's sentence and this gem will automatically provide information on the type of error (i.e. punctuation, spelling, etc.), the placement of the errors, and the number of errors.}
+  spec.homepage      = ""
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.7"
+  spec.add_development_dependency "rake", "~> 10.0"
+  spec.add_development_dependency "rspec"
+  spec.add_runtime_dependency "levenshtein-ffi"
+  spec.add_runtime_dependency "linguistics", "~> 2.0.2"
+  spec.add_runtime_dependency "verbs"
+  spec.add_runtime_dependency "engtagger"
+end

data/lib/chat_correct/capitalization.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module ChatCorrect
+  class Capitalization
+    attr_reader :token_a, :token_b
+    def initialize(token_a:, token_b:)
+      @token_a = token_a
+      @token_b = token_b
+    end
+    def capitalization_error?
+      token_a.downcase.eql?(token_b.downcase) && token_a != token_b
+    end
+  end
+end

data/lib/chat_correct/combine_multi_word_verbs.rb ADDED Viewed

@@ -0,0 +1,51 @@
+require 'engtagger'
+module ChatCorrect
+  class CombineMultiWordVerbs
+    TOKEN_ARRAY = ['are', 'am', 'was', 'were', 'have', 'has', 'had', 'will', 'would', 'could', 'did', 'arenƪt', 'wasnƪt', 'werenƪt', 'havenƪt', 'hasnƪt', 'hadnƪt', 'wouldnƪt', 'couldnƪt', 'didnƪt']
+    TOKEN_ARRAY_2 = ['are', 'am', 'was', 'were', 'have', 'has', 'had', 'will', 'would', 'did', 'could']
+    attr_reader :text
+    def initialize(text:)
+      @text = text
+    end
+    def combine
+      tgr = EngTagger.new
+      tokens = ChatCorrect::Tokenize.new(text: text).tokenize
+      sentence_tagged = tgr.add_tags(text).split
+      tokens_to_delete = []
+      tokens.each_with_index do |token, index|
+        case
+        when ((token.eql?('will') && tokens[index + 1].eql?('have')) || (token.eql?('would') && tokens[index + 1].eql?('have')) || (token.eql?('had') && tokens[index + 1].eql?('been'))) &&
+          sentence_tagged[index + 2].to_s.partition('>').first[1..-1][0].eql?('v')
+            tokens[index] = token + ' ' + tokens[index + 1] + ' ' + tokens[index + 2]
+            tokens_to_delete << tokens[index + 1].to_s
+            tokens_to_delete << tokens[index + 2].to_s
+        when TOKEN_ARRAY_2.include?(token) &&
+          tokens[index + 1].to_s.eql?('not') &&
+          sentence_tagged[index + 2].to_s[1].to_s.eql?('v')
+            tokens[index] = token + ' ' + tokens[index + 1] + ' ' + tokens[index + 2]
+            tokens_to_delete << tokens[index + 1].to_s
+            tokens_to_delete << tokens[index + 2].to_s
+        when TOKEN_ARRAY.include?(token) &&
+          (sentence_tagged[index + 1].to_s[1].to_s.eql?('v') ||
+          sentence_tagged[index + 1].to_s[1..2].to_s.eql?('rb')) &&
+          tokens[index - 1].exclude?(' ') &&
+          tokens[index + 1] != 'had'
+            tokens[index] = token + ' ' + tokens[index + 1]
+            tokens_to_delete << tokens[index + 1].to_s
+        end
+      end
+      delete_tokens_from_array(tokens, tokens_to_delete)
+    end
+    private
+    def delete_tokens_from_array(tokens, array)
+      array.each do |token_to_delete|
+        tokens.delete(token_to_delete) if tokens.include?(token_to_delete)
+      end
+      tokens
+    end
+  end
+end

data/lib/chat_correct/common_verb_mistake.rb ADDED Viewed

@@ -0,0 +1,62 @@
+module ChatCorrect
+  class CommonVerbMistake
+    COMMON_VERB_MISTAKES =
+      { "flied" => "flew",
+        "weared" => "wore",
+        "finded" => "found",
+        "fighted" => "fought",
+        "clinged" => "clung",
+        "bleeded" => "bled",
+        "bringed" => "brought",
+        "catched" => "caught",
+        "cutted" => "cut",
+        "feeled" => "felt",
+        "drived" => "drove",
+        "falled" => "fell",
+        "forgetted" => "forgot",
+        "freezed" => "froze",
+        "gived" => "gave",
+        "heared" => "heard",
+        "hurted" => "hurt",
+        "keeped" => "kept",
+        "knowed" => "knew",
+        "leaved" => "left",
+        "losed" => "lost",
+        "meaned" => "meant",
+        "quited" => "quit",
+        "quitted" => "quit",
+        "ridded" => "rode",
+        "runned" => "ran",
+        "rised" => "rose",
+        "seed" => "saw",
+        "singed" => "sang",
+        "sitted" => "sat",
+        "sited" => "sat",
+        "speaked" => "spoke",
+        "standed" => "stood",
+        "sweared" => "swore",
+        "swimmed" => "swam",
+        "thinked" => "thought",
+        "telled" => "told",
+        "taked" => "took",
+        "stringed" => "strung",
+        "teached" => "taught",
+        "waked" => "woke",
+        "weeped" => "wept",
+        "winned" => "won",
+        "writed" => "wrote",
+        "weaved" => "wove",
+        "gots" => "have"
+      }
+    attr_reader :token_a, :token_b
+    def initialize(token_a:, token_b:)
+      @token_a = token_a
+      @token_b = token_b
+    end
+    def exists?
+      COMMON_VERB_MISTAKES[token_a].eql?(token_b) ||
+      COMMON_VERB_MISTAKES[token_b].eql?(token_a)
+    end
+  end
+end

data/lib/chat_correct/contraction.rb ADDED Viewed

@@ -0,0 +1,103 @@
+module ChatCorrect
+  class Contraction
+    NOT_CONTRACTION = {
+      'am' => "ain't",
+      'do' => "don't",
+      'will' => "won't",
+      'shall' => "shan't",
+      'is' => "isn't"
+    }
+    IRREGULAR_CONTRACTION = {
+      ['is', 'not'] => "ain't",
+      ['madam', nil] => "ma'am",
+      ['never-do-well', nil] => "ne'er-do-well",
+      ['cat-of-nine-tails', nil] => "cat-o'-nine-tails",
+      ['jack-of-the-lantern', nil] => "jack-o'-lantern",
+      ['will-of-the-wisp', nil] => "will-o'-the-wisp"
+    }
+    attr_reader :token_a, :token_b, :contraction
+    def initialize(token_a:, token_b:, contraction:)
+      return false if token_a.nil? || contraction.nil?
+      @token_a = token_a.downcase
+      token_b ? @token_b = token_b.downcase : @token_b = token_b
+      @contraction = contraction.downcase.gsub(/ƪ/, "'")
+    end
+    def contraction?
+      !token_a.nil? && !contraction.nil? &&
+      (is_a_not_contraction? ||
+      is_an_irregular_contraction? ||
+      is_an_us_contraction? ||
+      is_an_am_contraction? ||
+      is_an_are_contraction? ||
+      is_an_is_does_has_contraction? ||
+      is_a_have_contraction? ||
+      is_a_had_did_would_contraction? ||
+      is_a_will_contraction? ||
+      is_an_of_contraction? ||
+      is_an_it_contraction? ||
+      is_a_them_contraction?)
+    end
+    private
+    def is_a_not_contraction?
+      token_b.eql?('not') && (NOT_CONTRACTION[token_a].eql?(contraction) ||
+        (contraction.partition("n't")[0].eql?(token_a)) &&
+        contraction.partition("n't")[2].empty?)
+    end
+    def is_an_irregular_contraction?
+      IRREGULAR_CONTRACTION[[token_a, token_b]].eql?(contraction)
+    end
+    def is_an_us_contraction?
+      token_b.eql?('us') && contraction.partition("'s")[0].eql?(token_a) &&
+      token_a.eql?('let') && contraction.partition("'s")[2].empty?
+    end
+    def is_an_am_contraction?
+      token_b.eql?("am") && contraction.partition("'m")[0].eql?(token_a) &&
+      token_a.eql?('i') && contraction.partition("'m")[2].empty?
+    end
+    def is_an_are_contraction?
+      token_b.eql?('are') && contraction.partition("'re")[0].eql?(token_a) &&
+      contraction.partition("'re")[2].empty?
+    end
+    def is_an_is_does_has_contraction?
+      (token_b.eql?('is') || token_b.eql?('does') || token_b.eql?('has')) &&
+        contraction.partition("'s")[0].eql?(token_a) && contraction.partition("'s")[2].empty?
+    end
+    def is_a_have_contraction?
+      token_b.eql?('have') && contraction.partition("'ve")[0].eql?(token_a) &&
+        contraction.partition("'ve")[2].empty?
+    end
+    def is_a_had_did_would_contraction?
+      (token_b.eql?('had') || token_b.eql?('did') || token_b.eql?('would')) &&
+        contraction.partition("'d")[0].eql?(token_a) &&
+        contraction.partition("'d")[2].empty?
+    end
+    def is_a_will_contraction?
+      token_b.eql?('will') && contraction.partition("'ll")[0].eql?(token_a) &&
+        contraction.partition("'ll")[2].empty?
+    end
+    def is_an_of_contraction?
+      token_a.eql?('of') && (contraction.eql?("o'") ||
+        contraction.partition("o' ")[-1].eql?(token_b))
+    end
+    def is_an_it_contraction?
+      token_a.eql?('it') && contraction.partition("'t")[-1].eql?(token_b)
+    end
+    def is_a_them_contraction?
+      token_b.eql?('them') && contraction.partition(" 'em")[0].eql?(token_a)
+    end
+  end
+end