RubyGems - clausewitz-spelling - Versions diffs - 0.1.16 → 0.1.17 - Mend

clausewitz-spelling 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/Gemfile.lock +5 -1
data/clausewitz-spelling.gemspec +1 -0
data/lib/clausewitz/spelling/checker.rb +35 -24
data/lib/clausewitz/spelling/version.rb +1 -1
metadata +15 -1

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f1929f3ff6bf94eaccc2d2e084bc8ddfb52cd3cd
-  data.tar.gz: 1e117b63b0e90bdb6bedb8d3d4e4286787829f04
+  metadata.gz: 8fe17c11c32b6260c764e55e2bd4d508e37bdc2f
+  data.tar.gz: 61083f1bc484d2a8b8dbf960b5226b947e17a520
 SHA512:
-  metadata.gz: 994b5c039f5be4b29420ea440cc8ed48f89e05f8cd0afb4794837335a245f6eb77e96bc4e7f53d1801fd6c6d6ca20a79b270577451766448d096444402dd722e
-  data.tar.gz: 1a56c0ca573791d2b9bb9d8edade88ca087fd49607d58c63f7f593c307b82a073fd6231638fa6eb3103388b7c4d83142bafcf51721c92a200a645a6bd735a3f1
+  metadata.gz: 42e7327da7a213edae016e730acb1177415453e3d87aa9771dda061b36f7fdfd01fe05966c8a35ef1fb4a650097025ad8de2c7c5f9ac6917361059d21ac02d62
+  data.tar.gz: a7199d624932691fdf0827c34bd7bf1cda2ff112817f27026cec2efc1bb03b5973cb8eb5ca2728f4e11f233f5ab02601db6426a9a768c9b2c2e2cc60c6463f8d

data/Gemfile.lock CHANGED

@@ -1,11 +1,12 @@
 PATH
   remote: .
   specs:
-    clausewitz-spelling (0.1.16)
+    clausewitz-spelling (0.1.17)
       colorize
       damerau-levenshtein
       ffi-aspell
       optimist
+      pragmatic_tokenizer
 GEM
   remote: https://rubygems.org/
@@ -19,6 +20,8 @@ GEM
       ffi
     method_source (0.9.2)
     optimist (3.0.0)
+    pragmatic_tokenizer (3.0.7)
+      unicode
     pry (0.12.2)
       coderay (~> 1.1.0)
       method_source (~> 0.9.0)
@@ -36,6 +39,7 @@ GEM
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.8.0)
     rspec-support (3.8.0)
+    unicode (0.4.4.4)
 PLATFORMS
   ruby

data/clausewitz-spelling.gemspec CHANGED

@@ -29,4 +29,5 @@ Gem::Specification.new do |spec|
   spec.add_dependency "optimist"
   spec.add_dependency "colorize"
   spec.add_dependency "damerau-levenshtein"
+  spec.add_dependency "pragmatic_tokenizer"
 end

data/lib/clausewitz/spelling/checker.rb CHANGED

@@ -5,6 +5,7 @@ require 'set'
 require 'tmpdir'
 require 'yaml'
 require 'damerau-levenshtein'
+require 'pragmatic_tokenizer'
 require 'clausewitz/localisation'
 require 'clausewitz/spelling/results'
@@ -53,6 +54,7 @@ module Clausewitz; module Spelling
       aspell_checker = load_aspell_checker(lang)
       spellcheck_ignore = entries&.delete('spellcheck_ignore')
       ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
+      ignored_keys << 'spellcheck_ignore'
       return IgnoredLangResult.new(lang) if ignored_keys.include?('all')
       return LangResults.new(lang, []) unless entries
       checks = entries.map do |key, entry|
@@ -75,30 +77,39 @@ module Clausewitz; module Spelling
       # Remove other localisation bits we don't care about.
       entry.gsub!(/§(%|\*|=|\d|W|G|R|B|Y|b|M|g|T|l|H|\+|-|!)/, '')
-      # We should also remove punctuation that is never part of words, like
-      # exclamation points, commas, semi-colons, and question marks.
-      # We should be using proper apostrophes for possessives in our loc.
-      entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
-      # If a word has one full stop at the end with no other full stops
-      # elsewhere in the word, it's probably an acronym or initialism like
-      # U.S.A. and so we should avoid stripping it. Otherwise, it's probably
-      # the end of a sentence and can be ignored.
-      words = entry.split(/\s|—/)
-      words.map! do |word|
-        word.sub!(/^'/, '')
-        word.sub!(/'?,?'?$/, '')
-        if word.end_with?('...')
-          word.sub(/\.\.\.$/, '')
-        elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
-          word.sub(/\.$/, '')
-        elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
-          word.sub(/\.$/, '')
-        else
-          word
-        end
-      end.join(" ")
+      ## We should also remove punctuation that is never part of words, like
+      ## exclamation points, commas, semi-colons, and question marks.
+      ## We should be using proper apostrophes for possessives in our loc.
+      #entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
+      ## If a word has one full stop at the end with no other full stops
+      ## elsewhere in the word, it's probably an acronym or initialism like
+      ## U.S.A. and so we should avoid stripping it. Otherwise, it's probably
+      ## the end of a sentence and can be ignored.
+      #words = entry.split(/\s|—/)
+      #words.map! do |word|
+      #  word.sub!(/^'/, '')
+      #  word.sub!(/'?,?'?$/, '')
+      #  if word.end_with?('...')
+      #    word.sub(/\.\.\.$/, '')
+      #  elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
+      #    word.sub(/\.$/, '')
+      #  elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
+      #    word.sub(/\.$/, '')
+      #  else
+      #    word
+      #  end
+      #end.join(" ")
+      opts = {
+        language: :en,
+        punctuation: :none,
+        downcase: false
+      }
+      words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry)
+      words = words.map { |word| word.split('—') }.flatten(1)
       checks = words.map { |word| check_word(checker, wordlist, word) }.compact
       EntryResults.new(key, checks)

data/lib/clausewitz/spelling/version.rb CHANGED

@@ -1,5 +1,5 @@
 module Clausewitz
   module Spelling
-    VERSION = "0.1.16"
+    VERSION = "0.1.17"
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: clausewitz-spelling
 version: !ruby/object:Gem::Version
-  version: 0.1.16
+  version: 0.1.17
 platform: ruby
 authors:
 - Will Chappell
@@ -122,6 +122,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: pragmatic_tokenizer
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description:
 email:
 - wtchappell@gmail.com