RubyGems - ruby-spellchecker - Versions diffs - 0.1.1 → 0.1.2 - Mend

ruby-spellchecker 0.1.1 → 0.1.2

Files changed (23) hide show

checksums.yaml +4 -4
data/.github/workflows/benchmark.yml +14 -0
data/.github/workflows/rspec.yml +26 -0
data/.github/workflows/rubocop.yml +26 -0
data/.rspec +0 -1
data/.rubocop.yml +0 -7
data/README.md +71 -12
data/benchmark/benchmark.rb +29 -0
data/dictionaries/company_names.txt +0 -3
data/dictionaries/ngrams.csv +0 -8
data/dictionaries/typos.csv +5 -7
data/lib/spellchecker.rb +1 -3
data/lib/spellchecker/detect_duplicate.rb +9 -5
data/lib/spellchecker/detect_typo.rb +2 -35
data/lib/spellchecker/dictionaries/typos_list.rb +2 -2
data/lib/spellchecker/dictionaries/us_toponyms.rb +4 -3
data/lib/spellchecker/tokenizer.rb +25 -16
data/lib/spellchecker/tokenizer/token.rb +5 -0
data/lib/spellchecker/version.rb +1 -1
data/ruby-spellchecker.gemspec +4 -2
metadata +42 -12
data/.travis.yml +0 -6
data/LICENSE +0 -21

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3d7994756cdab030a88d162801d9c0bae3981a1cf0c80041769dda663c42dbdb
-  data.tar.gz: b397558b59a1cd70229916a1876c55c91c2ca703c7f8dfbda5cc81eb6f107848
+  metadata.gz: 19fe4bc1957bb2abc21b2cdba7ccc55ab33cefc7c064055d35a338a01fbd910d
+  data.tar.gz: 74f000046be2ba09622d6bf725058a4b018e7fdd81e340b9a61593709312c626
 SHA512:
-  metadata.gz: '09e6946ae6358ea3dec8e8681b23e83370a301434e32984998e36732fc8bf78f1d2d84709fb019b1d1eb28d9dbf400d943ca2254a4b67e444e84a3f611d77974'
-  data.tar.gz: d0473248fd1703f17b4e7c4061e1827bf72d82127baacdc7dddf50c93630579aa76861af06013a6f2bb5ddc89b295e46de6c730570b5c5e6b4934d03a45870b3
+  metadata.gz: 18c5dfde1bb90223e24a87da7a68c15f85b57cfd8709c8a1938d2e0c2e9dfbb12382cc99091209afd5f6d9951840113658a9c0d5e57d34a24dc394a935522481
+  data.tar.gz: 46c48cb356d5f3f825bfe3bb0ce4998e36d3b69ed233592905e9d7eee1a456c524019f8606673ef8bdbe329bf8a68ea3663c5395fdb8e6b47647379942a6dbce

data/.github/workflows/benchmark.yml ADDED

@@ -0,0 +1,14 @@
+name: Benchmark
+on: push
+jobs:
+  verify:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Ruby 2.6.0
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: 2.6.0
+      - name: Run benchmarks
+        run: ruby benchmark/benchmark.rb

data/.github/workflows/rspec.yml ADDED

@@ -0,0 +1,26 @@
+name: Rspec
+on: push
+jobs:
+  verify:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Ruby 2.6.0
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: 2.6.0
+      - name: Cache gems
+        uses: actions/cache@v1
+        with:
+          path: vendor/bundle
+          key: ${{ runner.os }}-gem-${{ hashFiles('**/Gemfile.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-gem-
+      - name: Install gems
+        run: |
+          gem install bundler
+          bundle config path vendor/bundle
+          bundle install --jobs 4 --retry 3
+      - name: Run RSpec
+        run: bundle exec rspec

data/.github/workflows/rubocop.yml ADDED

@@ -0,0 +1,26 @@
+name: Rubocop
+on: push
+jobs:
+  rubocop:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Ruby 2.6.0
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: 2.6.0
+      - name: Cache gems
+        uses: actions/cache@v1
+        with:
+          path: vendor/bundle
+          key: ${{ runner.os }}-gem-${{ hashFiles('**/Gemfile.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-gem-
+      - name: Install gems
+        run: |
+          gem install bundler
+          bundle config path vendor/bundle
+          bundle install --jobs 4 --retry 3
+      - name: Run RuboCop
+        run: bundle exec rubocop

data/.rspec CHANGED

@@ -1,3 +1,2 @@
---format documentation
 --color
 --require spec_helper

data/.rubocop.yml CHANGED

@@ -1,7 +1,3 @@
-require:
-  - rubocop-performance
-  - rubocop-rails
 AllCops:
   NewCops: enable
   Exclude:
@@ -40,6 +36,3 @@ Naming/FileName:
 Naming/MethodParameterName:
   MinNameLength: 2
-Rails/SkipsModelValidations:
-  Enabled: false

data/README.md CHANGED

@@ -1,15 +1,11 @@
-# Spellchecker
-Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/spellchecker`. To experiment with that code, run `bin/console` for an interactive prompt.
-TODO: Delete this and the text above, and describe your gem
+# Ruby Spellchecker
 ## Installation
 Add this line to your application's Gemfile:
 ```ruby
-gem 'spellchecker'
+gem 'ruby-spellchecker'
 ```
 And then execute:
@@ -22,14 +18,77 @@ Or install it yourself as:
 ## Usage
-TODO: Write usage instructions here
+### Get list of errors
+```ruby
+Spellchecker.check(text)
+```
+### Autocorrection
-## Development
+```ruby
+text = <<~TEXT
+  I started my schooling as the majority did in my area, at the local
+  primarry school. I then went to the local secondarry school and
+  recieved grades in English, Maths, Phisics, Biology, Geography,
+  Art, Graphical Comunication and Philosophy of Religeon. I'll not
+  bore you with the 'A' levels and above.
+  Notice the ambigous English qualification above. It was, in truth,
+  a cource dedicated to reading "Lord of the flies" and other gems,
+  and a weak atempt at getting us to commprehend them. Luckilly my
+  middle-class upbringing gave me a head start as I was was already
+  aquainted with that sort of langauge these books used (and not just
+  the Peter and Jane books) and had read simillar books before. I will
+  never be able to put that paticular course down as much as I desire
+  to because, for all its faults, it introduced me to Steinbeck,
+  Malkovich and the wonders of Lenny, mice and pockets.
+  My education never included one iota of grammar. Lynn Truss points
+  out in "Eats, shoots and leaves" that many people were excused from
+  the rigours of learning English grammar during their schooling over
+  the last 30 or so years because the majority or decision-makers
+  decided one day that it might hinder imagination and expresion (so
+  what, I ask, happened to all those expresive and imaginative people
+  before the ruling?).
+TEXT
+corrected = Spellchecker.correct(text)
+```
+Wdiff:
-After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+```ruby
+require 'wdiff'
-To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+Wdiff.diff(text, corrected)
-## Contributing
+```
-Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/spellchecker.
+Result:
+```diff
+I started my schooling as the majority did in my area, at the local
+[-primarry-] {+primary+} school. I then went to the local [-secondarry-] {+secondary+} school and
+[-recieved-] {+received+} grades in English, Maths, [-Phisics,-] {+Physics,+} Biology, Geography,
+Art, Graphical Comunication and Philosophy of [-Religeon.-] {+Religion.+} I'll not
+bore you with the 'A' levels and above.
+Notice the [-ambigous-] {+ambiguous+} English qualification above. It was, in truth,
+a [-cource-] {+course+} dedicated to reading "Lord of the flies" and other gems,
+and a weak [-atempt-] {+attempt+} at getting us to [-commprehend-] {+comprehend+} them. [-Luckilly-] {+Luckily+} my
+middle-class upbringing gave me a head start as I was [-was-] already
+[-aquainted-] {+acquainted+} with that sort of [-langauge-] {+language+} these books used (and not just
+the Peter and Jane books) and had read [-simillar-] {+similar+} books before. I will
+never be able to put that [-paticular-] {+particular+} course down as much as I desire
+to because, for all its faults, it introduced me to Steinbeck,
+Malkovich and the wonders of Lenny, mice and pockets.
+My education never included one iota of grammar. Lynn Truss points
+out in "Eats, shoots and leaves" that many people were excused from
+the rigours of learning English grammar during their schooling over
+the last 30 or so years because the majority or decision-makers
+decided one day that it might hinder imagination and [-expresion-] {+expression+} (so
+what, I ask, happened to all those [-expresive-] {+expressive+} and imaginative people
+before the ruling?).
+```

data/benchmark/benchmark.rb ADDED

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require 'benchmark'
+require_relative '../lib/spellchecker'
+text1 = <<~TEXT
+  I started my schooling as the majority did in my area, at the local primarry school. I then went to the local secondarry school and recieved grades in English, Maths, Phisics, Biology, Geography, Art, Graphical Comunication and Philosophy of Religeon. I'll not bore you with the 'A' levels and above.
+  Notice the ambigous English qualification above. It was, in truth, a cource dedicated to reading "Lord of the flies" and other gems, and a weak atempt at getting us to commprehend them. Luckilly my middle-class upbringing gave me a head start as I was already aquainted with that sort of langauge these books used (and not just the Peter and Jane books) and had read simillar books before. I will never be able to put that paticular course down as much as I desire to because, for all its faults, it introduced me to Steinbeck, Malkovich and the wonders of Lenny, mice and pockets.
+  My education never included one iota of grammar. Lynn Truss points out in "Eats, shoots and leaves" that many people were excused from the rigours of learning English grammar during their schooling over the last 30 or so years because the majority or decision-makers decided one day that it might hinder imagination and expresion (so what, I ask, happened to all those expresive and imaginative people before the ruling?).
+  I started my schooling as the majority did in my area, at the local primary school. I then went to the local secondary school and received grades in English, Maths, Physics, Biology, Geography, Art, Graphical Communication and Philosophy of Religion. I'll not bore you with the 'A' levels and above.
+  Notice the ambiguous English qualification above. It was, in truth, a course dedicated to reading "Lord of the flies" and other gems, and a weak attempt at getting us to comprehend them. Luckily my middle-class upbringing gave me a head start as I was already acquainted with that sort of language these books used (and not just the Peter and Jane books) and had read similar books before. I will never be able to put that particular course down as much as I desire to because, for all its faults, it introduced me to Steinbeck, Malkovich and the wonders of Lenny, mice and pockets.
+  My education never included one iota of grammar. Lynn Truss points out in "Eats, shoots and leaves" that many people were excused from the rigours of learning English grammar during their schooling over the last 30 or so years because the majority or decision-makers decided one day that it might hinder imagination and expression (so what, I ask, happened to all those expressive and imaginative people before the ruling?).
+TEXT
+text2 = <<~TEXT
+  Mail Attachment Support Viewable document types (apple.com)
+  .jpg, .tiff, .gif (images); .doc and .docx (Microsoft Word); .htm and .html (web pages); .key (Keynote); .numbers (Numbers); .pages (Pages); .pdf (Preview and Adobe Acrobat); .ppt and .pptx (Microsoft PowerPoint); .txt (text); .rtf (rich text format); .vcf (contact information); .xls and .xlsx (Microsoft Excel); .zip; .ics; .usdz (USDZ-Universal).
+TEXT
+text = text1 + ([text2] * 5).join("\n")
+Spellchecker.check(text)
+Benchmark.bm do |x|
+  x.report('tokenize') { 500.times { Spellchecker::Tokenizer.call(text) } }
+  x.report('check   ') { 500.times { Spellchecker.check(text) } }
+  x.report('correct ') { 500.times { Spellchecker.correct(text) } }
+end

data/dictionaries/company_names.txt CHANGED

@@ -350588,9 +350588,6 @@ Comunicatii
 Comunicatiilor
 Comunicating
 Comunicatio
-Comunication
-Comunicational
-Comunications
 Comunicatistampa
 Comunicativa
 Comunicativas

data/dictionaries/ngrams.csv CHANGED

@@ -452,7 +452,6 @@ atlanta journal and constitution,Atlanta Journal-Constitution
 atlanta journal constitution,Atlanta Journal-Constitution
 atlanta-journal and constitution,Atlanta Journal-Constitution
 atlanta-journal constitution,Atlanta Journal-Constitution
-atlantic ocean,atlantic Ocean
 award winning,award-winning
 b'nai brith,B'nai B'rith
 b'nai b’rith,B'nai B'rith
@@ -1318,7 +1317,6 @@ in tact,intact
 in their life time,in their lifetime
 in their life-time,in their lifetime
 in united states,in the United States
-indian ocean,indian Ocean
 indira gahndi,Indira Gandhi
 indira ghandi,Indira Gandhi
 inherlife time,inher lifetime
@@ -1585,7 +1583,6 @@ lloyds of london,Lloyd's of London
 long awaited,long-awaited
 longer then,longer than
 loosing on penalties,losing on penalties
-lorem ipsum dolor sit,[default text]
 los angelas,los Angeles
 los angels,los Angeles
 los angles,los Angeles
@@ -1732,8 +1729,6 @@ mostly knowed as,mostly known as
 mostly knowed for,mostly known for
 mostly knows as,mostly known as
 mostly knows for,mostly known for
-moyen age,moyen Âge
-moyen âge,moyen Âge
 muhammed ali,Muhammad Ali
 mullerian duct,Müllerian Duct
 mullerian ducts,Müllerian Ducts
@@ -1911,7 +1906,6 @@ over sized,oversized
 over-size,oversize
 over-sized,oversized
 owning to,owing to
-pacific ocean,pacific Ocean
 palm d'or,Palme d'Or
 palm d`or,Palme d'Or
 palm d’or,Palme d'Or
@@ -2263,8 +2257,6 @@ the fist time,the first time
 the frist time,the first time
 the just the,just the
 the least the least,the least
-the on going,the ongoing
-the on-going,the ongoing
 the question how,the question of how
 the question where,the question of where
 the roughly the,roughly the

data/dictionaries/typos.csv CHANGED

@@ -8898,7 +8898,6 @@ arful,awful
 arfull,awful
 arfully,artfully
 arfument,argument
-arg,argument
 argement,argument
 argentia,argentina
 argentinia,argentina
@@ -10568,7 +10567,6 @@ attrbibutes,attributes
 attrbiutes,attributes
 attrbute,attribute
 attrbutes,attributes
-attrib,attribute
 attribbutes,attributes
 attribites,attributes
 attribte,attribute
@@ -10609,7 +10607,6 @@ attrivute,attribute
 attrocious,atrocious
 attrocities,atrocities
 attrocity,atrocity
-attrs,attributes
 attruibutes,attributes
 atttempts,attempts
 atttract,attract
@@ -20329,6 +20326,8 @@ commpletion,completion
 commplexity,complexity
 commplishion,completion
 commpm,common
+commprehend,comprehend
+commprehended,comprehended
 commpression,compression
 commptiblity,commptibility
 commpunted,competent
@@ -26494,7 +26493,8 @@ countufersey,controversy
 countuness,countenance
 couontable,countable
 coupld,couple
-cource,source
+cource,course
+primarry,primary
 cources,courses
 courcework,coursework
 courching,crouching
@@ -40347,7 +40347,6 @@ enusre,ensure
 enusres,ensures
 enusring,ensuring
 enuthic,enthusiastic
-env,environment
 enveloppe,envelope
 envelopped,envelope
 enveloppen,envelope
@@ -61708,7 +61707,6 @@ isolatuon,isolation
 isoldation,isolation
 isomorphim,isomorphism
 isomorphims,isomorphisms
-isort,frosted
 isotretioin,isotretion
 isotrop,isotope
 ispired,inspired
@@ -96580,6 +96578,7 @@ secodns,seconds
 secods,seconds
 secomdary,secondary
 secondady,secondary
+secondarry,secondary
 seconday,secondary
 seconderies,secondaries
 secondery,secondary
@@ -112694,7 +112693,6 @@ unitl,until
 unitoligist,unitologist
 unitoligists,unitologists
 unitomious,unanimous
-unittests,unit
 uniue,unique
 univeral,universal
 univeralism,universalism

data/lib/spellchecker.rb CHANGED

@@ -13,8 +13,6 @@ require_relative 'spellchecker/detect_typo'
 require_relative 'spellchecker/detect_ngram'
 module Spellchecker
-  NGRAM_NUMBER = 5
   module MistakeTypes
     ALL = [
       DUPLICATE = 'duplicate',
@@ -60,7 +58,7 @@ module Spellchecker
   # @param mistakes [Array<Spellchecker::Mistake>]
   # @return [String]
   def apply_fixes(text, mistakes)
-    mistakes_hash = mistakes.map { |m| [m.context, m.context.sub(m.text, m.correction)] }.to_h
+    mistakes_hash = mistakes.map { |m| [m.text, m.correction] }.to_h
     regexp = Regexp.union(mistakes_hash.keys)
     text.gsub(regexp, mistakes_hash)

data/lib/spellchecker/detect_duplicate.rb CHANGED

@@ -12,7 +12,8 @@ module Spellchecker
          yum yummy agar kori lai please mumble extremely
          highly root whoa knock check woof bounce bouncy
          million tut wow mola paw hubba histrio cha nom
-         chop same extra more bang big go no pom]
+         chop same extra more bang big go no pom la ah
+         ha oh ew]
     ).freeze
     SKIP_PHRASES = Set.new(['try and', 'and try', 'and again', 'again and',
@@ -46,13 +47,13 @@ module Spellchecker
       text, correction = find_duplicate(t1, t2, t3, t4)
       return unless text
-      return if t2.text.match?(/\A[A-Z]/)
+      return if t2.capital? || t3.capital?
       return if SKIP_PHRASES.include?(correction.downcase)
       return unless Dictionaries::EnglishWords.include?(t2.text)
       return if skip_phrase?(t1, t2, t3, t4)
       return if repetition?(t1, t2, t3, t4)
-      return if from_to_phrase?(t1, t2, t3, t4)
+      return if from_to_phrase?(t1, t2, t3)
       return if quoted?(t1, t2, t3, t4)
       Mistake.new(text: text, correction: correction,
@@ -79,22 +80,25 @@ module Spellchecker
       false
     end
+    # rubocop:disable Metrics/AbcSize
     def repetition?(t1, t2, t3, t4)
       return true if t1.downcased == t3.downcased && t1.downcased == t4.next.downcased
       return true if t1.prev.downcased == t2.downcased && t2.downcased == t4.downcased
+      return true if t1.prev.downcased == t1.downcased && t1.downcased == t3.downcased
       return true if t1.downcased == t2.downcased && (t1.downcased == t3.downcased ||
                                                       t1.downcased == t1.prev.downcased ||
                                                       t1.downcased == t4.downcased)
       false
     end
+    # rubocop:enable Metrics/AbcSize
     def quoted?(t1, _t2, t3, t4)
       t1.prev.text == '"' && (t3.text == '"' || t4.text == '"')
     end
-    def from_to_phrase?(t1, t2, t3, t4)
-      t1.downcased == 'from' && t3.downcased == 'to' && t2.downcased == t4.downcased
+    def from_to_phrase?(t1, t2, t3)
+      t1.prev.downcased == 'from' && t2.downcased == 'to' && t1.downcased == t3.downcased
     end
   end
 end

data/lib/spellchecker/detect_typo.rb CHANGED

@@ -4,15 +4,9 @@ module Spellchecker
   module DetectTypo
     PROPER_NAME_REGEXP = /\A(?:[a-z]+[A-Z])|(?:[A-Z]+.+[A-Z]+)|(?:[A-Z]{2,}[^A-Z]+)/.freeze
     ABBREVIATION_REGEXP = /\A(?:[A-Z]{2,4})|(?:[A-Z][a-z])\z/.freeze
-    MUTEX = Mutex.new
     LENGTH_LIMIT = 2
-    POSTFILTERS = {
-      'aan' => :all_english_words?,
-      'dont' => :any_english_word?
-    }.freeze
     module_function
     # @param token [Spellchecker::Tokenizer::Token]
@@ -29,12 +23,9 @@ module Spellchecker
       return if ABBREVIATION_REGEXP.match?(word)
       return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word))
-      is_capital = word.match?(/\A[A-Z]/)
-      return if is_capital && proper_noun?(word)
-      return if postfilter?(token)
+      return if token.capital? && proper_noun?(word)
-      correction = correction.sub(/\S/, &:upcase) if is_capital
+      correction = correction.sub(/\S/, &:upcase) if token.capital?
       Mistake.new(text: word, correction: correction,
                   position: token.position, type: MistakeTypes::SPELLING)
@@ -47,29 +38,5 @@ module Spellchecker
         Dictionaries::CompanyNames.include?(word) ||
         Dictionaries::UsToponyms.include?(word)
     end
-    # @param token [Spellchecker::Tokenizer::Token]
-    # @return [Boolean]
-    def postfilter?(token)
-      filter = POSTFILTERS[token.downcased]
-      return false unless filter
-      !method(filter).call(token)
-    end
-    # @param token [Spellchecker::Tokenizer::Token]
-    # @return [Boolean]
-    def all_english_words?(token)
-      Dictionaries::EnglishWords.include?(token.prev.text) &&
-        Dictionaries::EnglishWords.include?(token.next.text)
-    end
-    # @param token [Spellchecker::Tokenizer::Token]
-    # @return [Boolean]
-    def any_english_word?(token)
-      Dictionaries::EnglishWords.include?(token.prev.text) ||
-        Dictionaries::EnglishWords.include?(token.next.text)
-    end
   end
 end

data/lib/spellchecker/dictionaries/typos_list.rb CHANGED

@@ -17,8 +17,8 @@ module Spellchecker
       # @param word [String]
       # @return [Boolean]
-      def include?(name)
-        !match(name).nil?
+      def include?(word)
+        !match(word).nil?
       end
       # @param word [String]

data/lib/spellchecker/dictionaries/us_toponyms.rb CHANGED

@@ -4,6 +4,7 @@ module Spellchecker
   module Dictionaries
     module UsToponyms
       MUTEX = Mutex.new
+      # https://github.com/grammakov/USA-cities-and-states
       PATH = Dictionaries.path.join('us_toponyms.csv')
       module_function
@@ -28,10 +29,10 @@ module Spellchecker
         csv = CSV.parse(PATH.read, headers: true, col_sep: '|')
         csv.each_with_object(Set.new) do |row, set|
-          set.add(row['City'])
-          set.add(row['State full'])
+          set.add(row['City']) if row['City']
+          set.add(row['State full']) if row['State full']
           set.add(row['County'].to_s.split(/\s+/).map(&:capitalize).join(' ')) unless row['County'].to_s.empty?
-          set.add(row['City alias'])
+          set.add(row['City alias']) if row['City alias']
         end
       end
     end

data/lib/spellchecker/tokenizer.rb CHANGED

@@ -10,6 +10,8 @@ module Spellchecker
     WORD_REGEXP = /[[:word:]]/.freeze
     LINEBREAK = "\n"
+    DOT = '.'
     SIMPLE_PRE = ['¿', '¡'].freeze
     SIMPLE_POST = ['!', '?', ',', ':', ';', '.'].freeze
     PAIR_PRE = ['(', '{', '[', '<', '«', '„', '‘'].freeze
@@ -22,9 +24,9 @@ module Spellchecker
     module_function
-    # rubocop:disable Metrics/AbcSize
-    # @param [String] str String to be tokenized.
-    # @return [Array<String>] Array of list.
+    # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
+    # @param str [String] string to be tokenized.
+    # @return [Spellchecker::Tokenizer::List]
     def call(str)
       chars = str.chars
       pos = 0
@@ -36,33 +38,40 @@ module Spellchecker
         if char.nil?
           list << Token.new(acc.join, pos) unless acc.empty?
-          break list
+          break
         end
         if char.match?(BLANK_REGEXP)
           list << Token.new(acc.join, pos) unless acc.empty?
           acc.clear
-        elsif splitable?(char, chars[i + 1], chars[i - 1])
-          list << Token.new(acc.join, pos) unless acc.empty?
-          list << Token.new(char, i)
+        elsif splitable?(char)
+          is_next_wordchar = word_char?(chars[i + 1])
-          acc.clear
+          if acc.empty? && char == DOT && is_next_wordchar
+            pos = i
+            acc << char
+          elsif !word_char?(chars[i - 1]) || !is_next_wordchar
+            list << Token.new(acc.join, pos) unless acc.empty?
+            list << Token.new(char, i)
+            acc.clear
+          else
+            acc << char
+          end
         else
           pos = i if acc.empty?
           acc << char
         end
       end
+      list
     end
-    # rubocop:enable Metrics/AbcSize
+    # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
-    # @param cur [String]
-    # @param prev [String]
-    # @param nxt [String]
+    # @param char [String]
     # @return [Boolean]
-    def splitable?(cur, prev, nxt)
-      return true if SPLITTABLES_REGEXP.match?(cur) && (!word_char?(prev) || !word_char?(nxt))
-      cur == LINEBREAK
+    def splitable?(char)
+      SPLITTABLES_REGEXP.match?(char) || char == LINEBREAK
     end
     # @param char [String]

data/lib/spellchecker/tokenizer/token.rb CHANGED

@@ -38,6 +38,11 @@ module Spellchecker
         @normalized ||= Utils.replace_quote(downcased)
       end
+      # @return [Boolean]
+      def capital?
+        @capital ||= text.match?(/\A[A-Z]/)
+      end
       # @return [String]
       def downcased
         @downcased ||= text.downcase

data/lib/spellchecker/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Spellchecker
-  VERSION = '0.1.1'
+  VERSION = '0.1.2'
 end

data/ruby-spellchecker.gemspec CHANGED

@@ -25,6 +25,8 @@ Gem::Specification.new do |spec|
   spec.require_paths = ['lib']
-  spec.add_development_dependency 'rspec', '~> 3.0'
-  spec.add_development_dependency 'rubocop', '~> 1.0'
+  spec.add_development_dependency 'rspec'
+  spec.add_development_dependency 'rubocop'
+  spec.add_development_dependency 'simplecov'
+  spec.add_development_dependency 'yard'
 end

metadata CHANGED

@@ -1,43 +1,71 @@
 --- !ruby/object:Gem::Specification
 name: ruby-spellchecker
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - Pete Matsyburka
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-11-15 00:00:00.000000000 Z
+date: 2020-11-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '3.0'
+        version: '0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '3.0'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rubocop
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.0'
+        version: '0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.0'
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: simplecov
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: yard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: Ruby spelling and grammar checker that can be used for autocorrection.
 email:
 - pete.matsy@gmail.com
@@ -45,14 +73,16 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- ".github/workflows/benchmark.yml"
+- ".github/workflows/rspec.yml"
+- ".github/workflows/rubocop.yml"
 - ".gitignore"
 - ".rspec"
 - ".rubocop.yml"
-- ".travis.yml"
 - Gemfile
-- LICENSE
 - README.md
 - Rakefile
+- benchmark/benchmark.rb
 - bin/console
 - bin/setup
 - dictionaries/company_names.txt

data/.travis.yml DELETED

@@ -1,6 +0,0 @@
----
-language: ruby
-cache: bundler
-rvm:
-  - 2.7.1
-before_install: gem install bundler -v 2.1.4

data/LICENSE DELETED

@@ -1,21 +0,0 @@
-The MIT License (MIT)
-Copyright (c) 2020 Pete Matsyburka
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.