ruby-spellchecker 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 640ae53e5d025e1a9721991025c156f0572121defc5a0c12e8b3261ec0fae6ff
4
- data.tar.gz: 504ac98dd3b4d985f910ad662f5e02b5feb54bb6986f9a50597cdee36893e97f
3
+ metadata.gz: ef027e0b01226b4df2d1ba0d4e4d63b9b28ee84070689d747f5573e52c94014b
4
+ data.tar.gz: 5daeaa089531bd755434545304c96a21501b0f2f5975611a9fc32f38cec92ee2
5
5
  SHA512:
6
- metadata.gz: 495d4d161b1ebeebfb45086403fc00588098e9a9b7713627495a09f77ab367c7d77a29771c0c94f13704f7d104331fa364abd2b976043907662b49b3bb51d9e2
7
- data.tar.gz: b0841b2ef09d09291da88cec2b4b8a871b78137db3b908c725cdcd3ff86d3d03cac2ffbd9aed9125f9316797c1684cadb7dd6ef651ce3bf9ad1c161b81e33cde
6
+ metadata.gz: 62237078c65b536f9ca986324f532ecf439f511184bf8ad738163b0681bb7f6cd66c783d5b1e63086d15802ea2c1ceb304ea35ddee9449fc6b3592c59b301d85
7
+ data.tar.gz: ff1bc1100afcdd792dcc600e8c42b696d2703bed837c2ea4fbccf2acdf8a7284bfb4b62341a72a3a675232a48e7131ebf025db4b3d32fb39aa0d7ff6cd629264
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Ruby Spellchecker
2
2
 
3
+ Fast ruby spelling and grammar checker that can be used for autocorrection. Used by [SiteInspector](http://github.com/siteinspector/siteinspector).
4
+
3
5
  ## Installation
4
6
 
5
7
  Add this line to your application's Gemfile:
@@ -693,7 +693,6 @@ corona virus,coronavirus
693
693
  cote chalonnaise,Côte Chalonnaise
694
694
  cote d'argent,Côte d'Argent
695
695
  cote d'azur,Côte d'Azur
696
- cote d'ivoire,Côte d'Ivoire
697
696
  cote d'opale,Côte d'Opale
698
697
  cote d'or,Côte d'Or
699
698
  cote d`argent,Côte d`Argent
@@ -2012,11 +2011,6 @@ salvador dali,Salvador Dalí
2012
2011
  sam elliot,Sam Elliott
2013
2012
  san luis potosi,San Luis Potosí
2014
2013
  sao paolo,São Paulo
2015
- sao paulo,São Paulo
2016
- sao tome and principe,São Tomé and Príncipe
2017
- sao tome and príncipe,São Tomé and Príncipe
2018
- sao tomé and principe,São Tomé and Príncipe
2019
- sao tomé and príncipe,São Tomé and Príncipe
2020
2014
  sau paolo,São Paulo
2021
2015
  sau paulo,São Paulo
2022
2016
  saudia arabia,saudi Arabia
@@ -21157,7 +21157,6 @@ complexty,complexity
21157
21157
  complexy,complexity
21158
21158
  compliacted,complicate
21159
21159
  compliactions,complication
21160
- compliancy,compliance
21161
21160
  complianed,compliance
21162
21161
  complians,complains
21163
21162
  compliants,complaints
@@ -89409,7 +89408,6 @@ reey,really
89409
89408
  refacted,refactored
89410
89409
  refactor's,refactored
89411
89410
  refactorig,refactoring
89412
- refactorings,refactors
89413
89411
  refactorng,refactoring
89414
89412
  refactorsing,refactoring
89415
89413
  refarence,references
@@ -6,6 +6,9 @@ module Spellchecker
6
6
  ABBREVIATION_REGEXP = /\A(?:[A-Z]{2,4})|(?:[A-Z][a-z])\z/.freeze
7
7
 
8
8
  LENGTH_LIMIT = 2
9
+ ABBREVIATION_LENGTH = 2
10
+ NUMBER_SHORTENING_SUFFIX = 'th'
11
+ SHORTENINGS = Set.new(%w[ver]).freeze
9
12
 
10
13
  module_function
11
14
 
@@ -20,7 +23,7 @@ module Spellchecker
20
23
 
21
24
  return unless correction
22
25
  return if PROPER_NAME_REGEXP.match?(word)
23
- return if ABBREVIATION_REGEXP.match?(word)
26
+ return if abbreviation?(token) || shortening?(token)
24
27
  return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word))
25
28
 
26
29
  return if token.capital? && proper_noun?(word)
@@ -38,5 +41,25 @@ module Spellchecker
38
41
  Dictionaries::CompanyNames.include?(word) ||
39
42
  Dictionaries::UsToponyms.include?(word)
40
43
  end
44
+
45
+ # @param token [Spellchecker::Tokenizer::Token]
46
+ # @return [Boolean]
47
+ def abbreviation?(token)
48
+ return true if ABBREVIATION_REGEXP.match?(token.text)
49
+ return true if token.text.length <= ABBREVIATION_LENGTH &&
50
+ !token.prev.word? && !token.next.word?
51
+
52
+ false
53
+ end
54
+
55
+ # @param token [Spellchecker::Tokenizer::Token]
56
+ # @return [Boolean]
57
+ def shortening?(token)
58
+ return true if token.text == NUMBER_SHORTENING_SUFFIX && token.prev.digit?
59
+ return true if SHORTENINGS.include?(token.downcased) &&
60
+ (token.next.dot? || token.next.digit?)
61
+
62
+ false
63
+ end
41
64
  end
42
65
  end
@@ -11,7 +11,7 @@ module Spellchecker
11
11
  # @return [Hash<Array<String>, String>]
12
12
  def all
13
13
  @all || MUTEX.synchronize do
14
- @all ||= CSV.parse(PATH.read).to_h.transform_keys { |e| e.split(' ') }
14
+ @all ||= CSV.parse(PATH.read).to_h.transform_keys(&:split)
15
15
  end
16
16
  end
17
17
 
@@ -16,7 +16,7 @@ module Spellchecker
16
16
  SIMPLE_POST = ['!', '?', ',', ':', ';', '.'].freeze
17
17
  PAIR_PRE = ['(', '{', '[', '<', '«', '„', '‘'].freeze
18
18
  PAIR_POST = [')', '}', ']', '>', '»', '“', '’'].freeze
19
- PRE_N_POST = ['"', "'", '`'].freeze
19
+ PRE_N_POST = ['"', "'", '`', '*'].freeze
20
20
 
21
21
  SPLITTABLES = SIMPLE_PRE + SIMPLE_POST + PAIR_PRE + PAIR_POST + PRE_N_POST
22
22
 
@@ -43,6 +43,21 @@ module Spellchecker
43
43
  @capital ||= text.match?(/\A[A-Z]/)
44
44
  end
45
45
 
46
+ # @return [Boolean]
47
+ def word?
48
+ @word ||= text.length > 1 || text.match?(/\w/)
49
+ end
50
+
51
+ # @return [Boolean]
52
+ def digit?
53
+ @digit ||= text.match?(/\A\d+\z/)
54
+ end
55
+
56
+ # @return [Boolean]
57
+ def dot?
58
+ @dot ||= text == Tokenizer::DOT
59
+ end
60
+
46
61
  # @return [String]
47
62
  def downcased
48
63
  @downcased ||= text.downcase
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spellchecker
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.4'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-spellchecker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pete Matsyburka
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-28 00:00:00.000000000 Z
11
+ date: 2020-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec