RubyGems - ruby-spellchecker - Versions diffs - 0.1.3 → 0.1.4 - Mend

ruby-spellchecker 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/README.md +2 -0
data/dictionaries/ngrams.csv +0 -6
data/dictionaries/typos.csv +0 -2
data/lib/spellchecker/detect_typo.rb +24 -1
data/lib/spellchecker/dictionaries/ngram_list.rb +1 -1
data/lib/spellchecker/tokenizer.rb +1 -1
data/lib/spellchecker/tokenizer/token.rb +15 -0
data/lib/spellchecker/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 640ae53e5d025e1a9721991025c156f0572121defc5a0c12e8b3261ec0fae6ff
-  data.tar.gz: 504ac98dd3b4d985f910ad662f5e02b5feb54bb6986f9a50597cdee36893e97f
+  metadata.gz: ef027e0b01226b4df2d1ba0d4e4d63b9b28ee84070689d747f5573e52c94014b
+  data.tar.gz: 5daeaa089531bd755434545304c96a21501b0f2f5975611a9fc32f38cec92ee2
 SHA512:
-  metadata.gz: 495d4d161b1ebeebfb45086403fc00588098e9a9b7713627495a09f77ab367c7d77a29771c0c94f13704f7d104331fa364abd2b976043907662b49b3bb51d9e2
-  data.tar.gz: b0841b2ef09d09291da88cec2b4b8a871b78137db3b908c725cdcd3ff86d3d03cac2ffbd9aed9125f9316797c1684cadb7dd6ef651ce3bf9ad1c161b81e33cde
+  metadata.gz: 62237078c65b536f9ca986324f532ecf439f511184bf8ad738163b0681bb7f6cd66c783d5b1e63086d15802ea2c1ceb304ea35ddee9449fc6b3592c59b301d85
+  data.tar.gz: ff1bc1100afcdd792dcc600e8c42b696d2703bed837c2ea4fbccf2acdf8a7284bfb4b62341a72a3a675232a48e7131ebf025db4b3d32fb39aa0d7ff6cd629264

data/README.md CHANGED

@@ -1,5 +1,7 @@
 # Ruby Spellchecker
+Fast ruby spelling and grammar checker that can be used for autocorrection. Used by [SiteInspector](http://github.com/siteinspector/siteinspector).
 ## Installation
 Add this line to your application's Gemfile:

data/dictionaries/ngrams.csv CHANGED

@@ -693,7 +693,6 @@ corona virus,coronavirus
 cote chalonnaise,Côte Chalonnaise
 cote d'argent,Côte d'Argent
 cote d'azur,Côte d'Azur
-cote d'ivoire,Côte d'Ivoire
 cote d'opale,Côte d'Opale
 cote d'or,Côte d'Or
 cote d`argent,Côte d`Argent
@@ -2012,11 +2011,6 @@ salvador dali,Salvador Dalí
 sam elliot,Sam Elliott
 san luis potosi,San Luis Potosí
 sao paolo,São Paulo
-sao paulo,São Paulo
-sao tome and principe,São Tomé and Príncipe
-sao tome and príncipe,São Tomé and Príncipe
-sao tomé and principe,São Tomé and Príncipe
-sao tomé and príncipe,São Tomé and Príncipe
 sau paolo,São Paulo
 sau paulo,São Paulo
 saudia arabia,saudi Arabia

data/dictionaries/typos.csv CHANGED

@@ -21157,7 +21157,6 @@ complexty,complexity
 complexy,complexity
 compliacted,complicate
 compliactions,complication
-compliancy,compliance
 complianed,compliance
 complians,complains
 compliants,complaints
@@ -89409,7 +89408,6 @@ reey,really
 refacted,refactored
 refactor's,refactored
 refactorig,refactoring
-refactorings,refactors
 refactorng,refactoring
 refactorsing,refactoring
 refarence,references

data/lib/spellchecker/detect_typo.rb CHANGED

@@ -6,6 +6,9 @@ module Spellchecker
     ABBREVIATION_REGEXP = /\A(?:[A-Z]{2,4})|(?:[A-Z][a-z])\z/.freeze
     LENGTH_LIMIT = 2
+    ABBREVIATION_LENGTH = 2
+    NUMBER_SHORTENING_SUFFIX = 'th'
+    SHORTENINGS = Set.new(%w[ver]).freeze
     module_function
@@ -20,7 +23,7 @@ module Spellchecker
       return unless correction
       return if PROPER_NAME_REGEXP.match?(word)
-      return if ABBREVIATION_REGEXP.match?(word)
+      return if abbreviation?(token) || shortening?(token)
       return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word))
       return if token.capital? && proper_noun?(word)
@@ -38,5 +41,25 @@ module Spellchecker
         Dictionaries::CompanyNames.include?(word) ||
         Dictionaries::UsToponyms.include?(word)
     end
+    # @param token [Spellchecker::Tokenizer::Token]
+    # @return [Boolean]
+    def abbreviation?(token)
+      return true if ABBREVIATION_REGEXP.match?(token.text)
+      return true if token.text.length <= ABBREVIATION_LENGTH &&
+                     !token.prev.word? && !token.next.word?
+      false
+    end
+    # @param token [Spellchecker::Tokenizer::Token]
+    # @return [Boolean]
+    def shortening?(token)
+      return true if token.text == NUMBER_SHORTENING_SUFFIX && token.prev.digit?
+      return true if SHORTENINGS.include?(token.downcased) &&
+                     (token.next.dot? || token.next.digit?)
+      false
+    end
   end
 end

data/lib/spellchecker/dictionaries/ngram_list.rb CHANGED

@@ -11,7 +11,7 @@ module Spellchecker
       # @return [Hash<Array<String>, String>]
       def all
         @all || MUTEX.synchronize do
-          @all ||= CSV.parse(PATH.read).to_h.transform_keys { |e| e.split(' ') }
+          @all ||= CSV.parse(PATH.read).to_h.transform_keys(&:split)
         end
       end

data/lib/spellchecker/tokenizer.rb CHANGED

@@ -16,7 +16,7 @@ module Spellchecker
     SIMPLE_POST = ['!', '?', ',', ':', ';', '.'].freeze
     PAIR_PRE = ['(', '{', '[', '<', '«', '„', '‘'].freeze
     PAIR_POST = [')', '}', ']', '>', '»', '“', '’'].freeze
-    PRE_N_POST = ['"', "'", '`'].freeze
+    PRE_N_POST = ['"', "'", '`', '*'].freeze
     SPLITTABLES = SIMPLE_PRE + SIMPLE_POST + PAIR_PRE + PAIR_POST + PRE_N_POST

data/lib/spellchecker/tokenizer/token.rb CHANGED

@@ -43,6 +43,21 @@ module Spellchecker
         @capital ||= text.match?(/\A[A-Z]/)
       end
+      # @return [Boolean]
+      def word?
+        @word ||= text.length > 1 || text.match?(/\w/)
+      end
+      # @return [Boolean]
+      def digit?
+        @digit ||= text.match?(/\A\d+\z/)
+      end
+      # @return [Boolean]
+      def dot?
+        @dot ||= text == Tokenizer::DOT
+      end
       # @return [String]
       def downcased
         @downcased ||= text.downcase

data/lib/spellchecker/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Spellchecker
-  VERSION = '0.1.3'
+  VERSION = '0.1.4'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby-spellchecker
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Pete Matsyburka
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-11-28 00:00:00.000000000 Z
+date: 2020-12-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec