ruby-spellchecker 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 640ae53e5d025e1a9721991025c156f0572121defc5a0c12e8b3261ec0fae6ff
4
- data.tar.gz: 504ac98dd3b4d985f910ad662f5e02b5feb54bb6986f9a50597cdee36893e97f
3
+ metadata.gz: ef027e0b01226b4df2d1ba0d4e4d63b9b28ee84070689d747f5573e52c94014b
4
+ data.tar.gz: 5daeaa089531bd755434545304c96a21501b0f2f5975611a9fc32f38cec92ee2
5
5
  SHA512:
6
- metadata.gz: 495d4d161b1ebeebfb45086403fc00588098e9a9b7713627495a09f77ab367c7d77a29771c0c94f13704f7d104331fa364abd2b976043907662b49b3bb51d9e2
7
- data.tar.gz: b0841b2ef09d09291da88cec2b4b8a871b78137db3b908c725cdcd3ff86d3d03cac2ffbd9aed9125f9316797c1684cadb7dd6ef651ce3bf9ad1c161b81e33cde
6
+ metadata.gz: 62237078c65b536f9ca986324f532ecf439f511184bf8ad738163b0681bb7f6cd66c783d5b1e63086d15802ea2c1ceb304ea35ddee9449fc6b3592c59b301d85
7
+ data.tar.gz: ff1bc1100afcdd792dcc600e8c42b696d2703bed837c2ea4fbccf2acdf8a7284bfb4b62341a72a3a675232a48e7131ebf025db4b3d32fb39aa0d7ff6cd629264
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Ruby Spellchecker
2
2
 
3
+ Fast ruby spelling and grammar checker that can be used for autocorrection. Used by [SiteInspector](http://github.com/siteinspector/siteinspector).
4
+
3
5
  ## Installation
4
6
 
5
7
  Add this line to your application's Gemfile:
@@ -693,7 +693,6 @@ corona virus,coronavirus
693
693
  cote chalonnaise,Côte Chalonnaise
694
694
  cote d'argent,Côte d'Argent
695
695
  cote d'azur,Côte d'Azur
696
- cote d'ivoire,Côte d'Ivoire
697
696
  cote d'opale,Côte d'Opale
698
697
  cote d'or,Côte d'Or
699
698
  cote d`argent,Côte d`Argent
@@ -2012,11 +2011,6 @@ salvador dali,Salvador Dalí
2012
2011
  sam elliot,Sam Elliott
2013
2012
  san luis potosi,San Luis Potosí
2014
2013
  sao paolo,São Paulo
2015
- sao paulo,São Paulo
2016
- sao tome and principe,São Tomé and Príncipe
2017
- sao tome and príncipe,São Tomé and Príncipe
2018
- sao tomé and principe,São Tomé and Príncipe
2019
- sao tomé and príncipe,São Tomé and Príncipe
2020
2014
  sau paolo,São Paulo
2021
2015
  sau paulo,São Paulo
2022
2016
  saudia arabia,saudi Arabia
@@ -21157,7 +21157,6 @@ complexty,complexity
21157
21157
  complexy,complexity
21158
21158
  compliacted,complicate
21159
21159
  compliactions,complication
21160
- compliancy,compliance
21161
21160
  complianed,compliance
21162
21161
  complians,complains
21163
21162
  compliants,complaints
@@ -89409,7 +89408,6 @@ reey,really
89409
89408
  refacted,refactored
89410
89409
  refactor's,refactored
89411
89410
  refactorig,refactoring
89412
- refactorings,refactors
89413
89411
  refactorng,refactoring
89414
89412
  refactorsing,refactoring
89415
89413
  refarence,references
@@ -6,6 +6,9 @@ module Spellchecker
6
6
  ABBREVIATION_REGEXP = /\A(?:[A-Z]{2,4})|(?:[A-Z][a-z])\z/.freeze
7
7
 
8
8
  LENGTH_LIMIT = 2
9
+ ABBREVIATION_LENGTH = 2
10
+ NUMBER_SHORTENING_SUFFIX = 'th'
11
+ SHORTENINGS = Set.new(%w[ver]).freeze
9
12
 
10
13
  module_function
11
14
 
@@ -20,7 +23,7 @@ module Spellchecker
20
23
 
21
24
  return unless correction
22
25
  return if PROPER_NAME_REGEXP.match?(word)
23
- return if ABBREVIATION_REGEXP.match?(word)
26
+ return if abbreviation?(token) || shortening?(token)
24
27
  return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word))
25
28
 
26
29
  return if token.capital? && proper_noun?(word)
@@ -38,5 +41,25 @@ module Spellchecker
38
41
  Dictionaries::CompanyNames.include?(word) ||
39
42
  Dictionaries::UsToponyms.include?(word)
40
43
  end
44
+
45
+ # @param token [Spellchecker::Tokenizer::Token]
46
+ # @return [Boolean]
47
+ def abbreviation?(token)
48
+ return true if ABBREVIATION_REGEXP.match?(token.text)
49
+ return true if token.text.length <= ABBREVIATION_LENGTH &&
50
+ !token.prev.word? && !token.next.word?
51
+
52
+ false
53
+ end
54
+
55
+ # @param token [Spellchecker::Tokenizer::Token]
56
+ # @return [Boolean]
57
+ def shortening?(token)
58
+ return true if token.text == NUMBER_SHORTENING_SUFFIX && token.prev.digit?
59
+ return true if SHORTENINGS.include?(token.downcased) &&
60
+ (token.next.dot? || token.next.digit?)
61
+
62
+ false
63
+ end
41
64
  end
42
65
  end
@@ -11,7 +11,7 @@ module Spellchecker
11
11
  # @return [Hash<Array<String>, String>]
12
12
  def all
13
13
  @all || MUTEX.synchronize do
14
- @all ||= CSV.parse(PATH.read).to_h.transform_keys { |e| e.split(' ') }
14
+ @all ||= CSV.parse(PATH.read).to_h.transform_keys(&:split)
15
15
  end
16
16
  end
17
17
 
@@ -16,7 +16,7 @@ module Spellchecker
16
16
  SIMPLE_POST = ['!', '?', ',', ':', ';', '.'].freeze
17
17
  PAIR_PRE = ['(', '{', '[', '<', '«', '„', '‘'].freeze
18
18
  PAIR_POST = [')', '}', ']', '>', '»', '“', '’'].freeze
19
- PRE_N_POST = ['"', "'", '`'].freeze
19
+ PRE_N_POST = ['"', "'", '`', '*'].freeze
20
20
 
21
21
  SPLITTABLES = SIMPLE_PRE + SIMPLE_POST + PAIR_PRE + PAIR_POST + PRE_N_POST
22
22
 
@@ -43,6 +43,21 @@ module Spellchecker
43
43
  @capital ||= text.match?(/\A[A-Z]/)
44
44
  end
45
45
 
46
+ # @return [Boolean]
47
+ def word?
48
+ @word ||= text.length > 1 || text.match?(/\w/)
49
+ end
50
+
51
+ # @return [Boolean]
52
+ def digit?
53
+ @digit ||= text.match?(/\A\d+\z/)
54
+ end
55
+
56
+ # @return [Boolean]
57
+ def dot?
58
+ @dot ||= text == Tokenizer::DOT
59
+ end
60
+
46
61
  # @return [String]
47
62
  def downcased
48
63
  @downcased ||= text.downcase
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spellchecker
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.4'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-spellchecker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pete Matsyburka
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-28 00:00:00.000000000 Z
11
+ date: 2020-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec