clausewitz-spelling 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f1929f3ff6bf94eaccc2d2e084bc8ddfb52cd3cd
4
- data.tar.gz: 1e117b63b0e90bdb6bedb8d3d4e4286787829f04
3
+ metadata.gz: 8fe17c11c32b6260c764e55e2bd4d508e37bdc2f
4
+ data.tar.gz: 61083f1bc484d2a8b8dbf960b5226b947e17a520
5
5
  SHA512:
6
- metadata.gz: 994b5c039f5be4b29420ea440cc8ed48f89e05f8cd0afb4794837335a245f6eb77e96bc4e7f53d1801fd6c6d6ca20a79b270577451766448d096444402dd722e
7
- data.tar.gz: 1a56c0ca573791d2b9bb9d8edade88ca087fd49607d58c63f7f593c307b82a073fd6231638fa6eb3103388b7c4d83142bafcf51721c92a200a645a6bd735a3f1
6
+ metadata.gz: 42e7327da7a213edae016e730acb1177415453e3d87aa9771dda061b36f7fdfd01fe05966c8a35ef1fb4a650097025ad8de2c7c5f9ac6917361059d21ac02d62
7
+ data.tar.gz: a7199d624932691fdf0827c34bd7bf1cda2ff112817f27026cec2efc1bb03b5973cb8eb5ca2728f4e11f233f5ab02601db6426a9a768c9b2c2e2cc60c6463f8d
@@ -1,11 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- clausewitz-spelling (0.1.16)
4
+ clausewitz-spelling (0.1.17)
5
5
  colorize
6
6
  damerau-levenshtein
7
7
  ffi-aspell
8
8
  optimist
9
+ pragmatic_tokenizer
9
10
 
10
11
  GEM
11
12
  remote: https://rubygems.org/
@@ -19,6 +20,8 @@ GEM
19
20
  ffi
20
21
  method_source (0.9.2)
21
22
  optimist (3.0.0)
23
+ pragmatic_tokenizer (3.0.7)
24
+ unicode
22
25
  pry (0.12.2)
23
26
  coderay (~> 1.1.0)
24
27
  method_source (~> 0.9.0)
@@ -36,6 +39,7 @@ GEM
36
39
  diff-lcs (>= 1.2.0, < 2.0)
37
40
  rspec-support (~> 3.8.0)
38
41
  rspec-support (3.8.0)
42
+ unicode (0.4.4.4)
39
43
 
40
44
  PLATFORMS
41
45
  ruby
@@ -29,4 +29,5 @@ Gem::Specification.new do |spec|
29
29
  spec.add_dependency "optimist"
30
30
  spec.add_dependency "colorize"
31
31
  spec.add_dependency "damerau-levenshtein"
32
+ spec.add_dependency "pragmatic_tokenizer"
32
33
  end
@@ -5,6 +5,7 @@ require 'set'
5
5
  require 'tmpdir'
6
6
  require 'yaml'
7
7
  require 'damerau-levenshtein'
8
+ require 'pragmatic_tokenizer'
8
9
  require 'clausewitz/localisation'
9
10
  require 'clausewitz/spelling/results'
10
11
 
@@ -53,6 +54,7 @@ module Clausewitz; module Spelling
53
54
  aspell_checker = load_aspell_checker(lang)
54
55
  spellcheck_ignore = entries&.delete('spellcheck_ignore')
55
56
  ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
57
+ ignored_keys << 'spellcheck_ignore'
56
58
  return IgnoredLangResult.new(lang) if ignored_keys.include?('all')
57
59
  return LangResults.new(lang, []) unless entries
58
60
  checks = entries.map do |key, entry|
@@ -75,30 +77,39 @@ module Clausewitz; module Spelling
75
77
  # Remove other localisation bits we don't care about.
76
78
  entry.gsub!(/§(%|\*|=|\d|W|G|R|B|Y|b|M|g|T|l|H|\+|-|!)/, '')
77
79
 
78
- # We should also remove punctuation that is never part of words, like
79
- # exclamation points, commas, semi-colons, and question marks.
80
- # We should be using proper apostrophes for possessives in our loc.
81
- entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
82
-
83
- # If a word has one full stop at the end with no other full stops
84
- # elsewhere in the word, it's probably an acronym or initialism like
85
- # U.S.A. and so we should avoid stripping it. Otherwise, it's probably
86
- # the end of a sentence and can be ignored.
87
- words = entry.split(/\s|—/)
88
- words.map! do |word|
89
- word.sub!(/^'/, '')
90
- word.sub!(/'?,?'?$/, '')
91
-
92
- if word.end_with?('...')
93
- word.sub(/\.\.\.$/, '')
94
- elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
95
- word.sub(/\.$/, '')
96
- elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
97
- word.sub(/\.$/, '')
98
- else
99
- word
100
- end
101
- end.join(" ")
80
+ ## We should also remove punctuation that is never part of words, like
81
+ ## exclamation points, commas, semi-colons, and question marks.
82
+ ## We should be using proper apostrophes for possessives in our loc.
83
+ #entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
84
+
85
+ ## If a word has one full stop at the end with no other full stops
86
+ ## elsewhere in the word, it's probably an acronym or initialism like
87
+ ## U.S.A. and so we should avoid stripping it. Otherwise, it's probably
88
+ ## the end of a sentence and can be ignored.
89
+ #words = entry.split(/\s|—/)
90
+ #words.map! do |word|
91
+ # word.sub!(/^'/, '')
92
+ # word.sub!(/'?,?'?$/, '')
93
+
94
+ # if word.end_with?('...')
95
+ # word.sub(/\.\.\.$/, '')
96
+ # elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
97
+ # word.sub(/\.$/, '')
98
+ # elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
99
+ # word.sub(/\.$/, '')
100
+ # else
101
+ # word
102
+ # end
103
+ #end.join(" ")
104
+
105
+ opts = {
106
+ language: :en,
107
+ punctuation: :none,
108
+ downcase: false
109
+ }
110
+ words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry)
111
+ words = words.map { |word| word.split('—') }.flatten(1)
112
+
102
113
 
103
114
  checks = words.map { |word| check_word(checker, wordlist, word) }.compact
104
115
  EntryResults.new(key, checks)
@@ -1,5 +1,5 @@
1
1
  module Clausewitz
2
2
  module Spelling
3
- VERSION = "0.1.16"
3
+ VERSION = "0.1.17"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clausewitz-spelling
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.16
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Chappell
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pragmatic_tokenizer
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  description:
126
140
  email:
127
141
  - wtchappell@gmail.com