clausewitz-spelling 0.1.16 → 0.1.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f1929f3ff6bf94eaccc2d2e084bc8ddfb52cd3cd
4
- data.tar.gz: 1e117b63b0e90bdb6bedb8d3d4e4286787829f04
3
+ metadata.gz: 8fe17c11c32b6260c764e55e2bd4d508e37bdc2f
4
+ data.tar.gz: 61083f1bc484d2a8b8dbf960b5226b947e17a520
5
5
  SHA512:
6
- metadata.gz: 994b5c039f5be4b29420ea440cc8ed48f89e05f8cd0afb4794837335a245f6eb77e96bc4e7f53d1801fd6c6d6ca20a79b270577451766448d096444402dd722e
7
- data.tar.gz: 1a56c0ca573791d2b9bb9d8edade88ca087fd49607d58c63f7f593c307b82a073fd6231638fa6eb3103388b7c4d83142bafcf51721c92a200a645a6bd735a3f1
6
+ metadata.gz: 42e7327da7a213edae016e730acb1177415453e3d87aa9771dda061b36f7fdfd01fe05966c8a35ef1fb4a650097025ad8de2c7c5f9ac6917361059d21ac02d62
7
+ data.tar.gz: a7199d624932691fdf0827c34bd7bf1cda2ff112817f27026cec2efc1bb03b5973cb8eb5ca2728f4e11f233f5ab02601db6426a9a768c9b2c2e2cc60c6463f8d
@@ -1,11 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- clausewitz-spelling (0.1.16)
4
+ clausewitz-spelling (0.1.17)
5
5
  colorize
6
6
  damerau-levenshtein
7
7
  ffi-aspell
8
8
  optimist
9
+ pragmatic_tokenizer
9
10
 
10
11
  GEM
11
12
  remote: https://rubygems.org/
@@ -19,6 +20,8 @@ GEM
19
20
  ffi
20
21
  method_source (0.9.2)
21
22
  optimist (3.0.0)
23
+ pragmatic_tokenizer (3.0.7)
24
+ unicode
22
25
  pry (0.12.2)
23
26
  coderay (~> 1.1.0)
24
27
  method_source (~> 0.9.0)
@@ -36,6 +39,7 @@ GEM
36
39
  diff-lcs (>= 1.2.0, < 2.0)
37
40
  rspec-support (~> 3.8.0)
38
41
  rspec-support (3.8.0)
42
+ unicode (0.4.4.4)
39
43
 
40
44
  PLATFORMS
41
45
  ruby
@@ -29,4 +29,5 @@ Gem::Specification.new do |spec|
29
29
  spec.add_dependency "optimist"
30
30
  spec.add_dependency "colorize"
31
31
  spec.add_dependency "damerau-levenshtein"
32
+ spec.add_dependency "pragmatic_tokenizer"
32
33
  end
@@ -5,6 +5,7 @@ require 'set'
5
5
  require 'tmpdir'
6
6
  require 'yaml'
7
7
  require 'damerau-levenshtein'
8
+ require 'pragmatic_tokenizer'
8
9
  require 'clausewitz/localisation'
9
10
  require 'clausewitz/spelling/results'
10
11
 
@@ -53,6 +54,7 @@ module Clausewitz; module Spelling
53
54
  aspell_checker = load_aspell_checker(lang)
54
55
  spellcheck_ignore = entries&.delete('spellcheck_ignore')
55
56
  ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
57
+ ignored_keys << 'spellcheck_ignore'
56
58
  return IgnoredLangResult.new(lang) if ignored_keys.include?('all')
57
59
  return LangResults.new(lang, []) unless entries
58
60
  checks = entries.map do |key, entry|
@@ -75,30 +77,39 @@ module Clausewitz; module Spelling
75
77
  # Remove other localisation bits we don't care about.
76
78
  entry.gsub!(/§(%|\*|=|\d|W|G|R|B|Y|b|M|g|T|l|H|\+|-|!)/, '')
77
79
 
78
- # We should also remove punctuation that is never part of words, like
79
- # exclamation points, commas, semi-colons, and question marks.
80
- # We should be using proper apostrophes for possessives in our loc.
81
- entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
82
-
83
- # If a word has one full stop at the end with no other full stops
84
- # elsewhere in the word, it's probably an acronym or initialism like
85
- # U.S.A. and so we should avoid stripping it. Otherwise, it's probably
86
- # the end of a sentence and can be ignored.
87
- words = entry.split(/\s|—/)
88
- words.map! do |word|
89
- word.sub!(/^'/, '')
90
- word.sub!(/'?,?'?$/, '')
91
-
92
- if word.end_with?('...')
93
- word.sub(/\.\.\.$/, '')
94
- elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
95
- word.sub(/\.$/, '')
96
- elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
97
- word.sub(/\.$/, '')
98
- else
99
- word
100
- end
101
- end.join(" ")
80
+ ## We should also remove punctuation that is never part of words, like
81
+ ## exclamation points, commas, semi-colons, and question marks.
82
+ ## We should be using proper apostrophes for possessives in our loc.
83
+ #entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
84
+
85
+ ## If a word has one full stop at the end with no other full stops
86
+ ## elsewhere in the word, it's probably an acronym or initialism like
87
+ ## U.S.A. and so we should avoid stripping it. Otherwise, it's probably
88
+ ## the end of a sentence and can be ignored.
89
+ #words = entry.split(/\s|—/)
90
+ #words.map! do |word|
91
+ # word.sub!(/^'/, '')
92
+ # word.sub!(/'?,?'?$/, '')
93
+
94
+ # if word.end_with?('...')
95
+ # word.sub(/\.\.\.$/, '')
96
+ # elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
97
+ # word.sub(/\.$/, '')
98
+ # elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
99
+ # word.sub(/\.$/, '')
100
+ # else
101
+ # word
102
+ # end
103
+ #end.join(" ")
104
+
105
+ opts = {
106
+ language: :en,
107
+ punctuation: :none,
108
+ downcase: false
109
+ }
110
+ words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry)
111
+ words = words.map { |word| word.split('—') }.flatten(1)
112
+
102
113
 
103
114
  checks = words.map { |word| check_word(checker, wordlist, word) }.compact
104
115
  EntryResults.new(key, checks)
@@ -1,5 +1,5 @@
1
1
  module Clausewitz
2
2
  module Spelling
3
- VERSION = "0.1.16"
3
+ VERSION = "0.1.17"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clausewitz-spelling
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.16
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Chappell
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pragmatic_tokenizer
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  description:
126
140
  email:
127
141
  - wtchappell@gmail.com