dwc_agent 3.0.7.0 → 3.0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 309d97f743627171e1479527de5354f5f0f172b88e3cc24f40e0cbe8bc3d1ee1
4
- data.tar.gz: 1d45999c8bf87e5ff68e3479ed24b7c9910e82f21295895fc3ab08e2f47cc301
3
+ metadata.gz: c0d83fd4dba1ddb6b9976bb1dea4c95a5365cf31b9c1afc335e035f70317a40f
4
+ data.tar.gz: 2fc6de4a6de283d9cf8d813306bb034ca67e20e684ce6136d6f2366512633d34
5
5
  SHA512:
6
- metadata.gz: ed8b465cf99cf1a63d03b4bb23ef972a840d33a2c107c66f5a2993d95071ec70a2d97e7dbbecdbc3bee16491908118684b9b35260853bc83c7825e86725b2e92
7
- data.tar.gz: f02efd62c4917e326ab111c45598f82b2d1c462d525a12753a88cacb43d5e6054e005b25a16b2522151dc06d08651a12deae3499c4d51e4f5a7bbbb474f78539
6
+ metadata.gz: d6ca8f0a7507c8a8d09b183f59ca47d97ff30a0de3e017da155e0b1e57ae5d34df4328ae9bdbb2fd4682b0d57afac7e251baebaa305fea5bc46b2886b22e5385
7
+ data.tar.gz: b9ad4777ade9052a3cd54926173c77973f899e76b3f65f43f31b45481de746e70daa4ecdcbb0b190567c27f3496d69cb5093d35522243b284ac5d4931fbe09a0
@@ -161,6 +161,10 @@ module DwcAgent
161
161
  \b(?i:via|from)\s*\b
162
162
  }x
163
163
 
164
+ POST_STRIP_TIDY = %r{
165
+ ^\s*[&,;]\s*
166
+ }x
167
+
164
168
  CHAR_SUBS = {
165
169
  '"' => '\'',
166
170
  '|' => ' | ',
@@ -199,15 +203,18 @@ module DwcAgent
199
203
  ' jr.,' => ' Jr.;',
200
204
  ' jr,' => ' Jr.;',
201
205
  '-jr' => ' Jr.',
202
- '-Jr' => ' Jr.'
206
+ '-Jr' => ' Jr.',
207
+ 'Dr.' => 'Dr. ',
208
+ 'prof.' => 'Prof. '
203
209
  }
204
210
 
205
211
  SEPARATORS = {
212
+ "^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]{1,})$" => "\\2 \\3 \\1",
206
213
  "^([A-Z]{1}\\.\\s*[[:alpha:]]{1,}),\\s*?([A-Z.]{1,})$" => "\\1 \\2",
207
214
  "^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
208
215
  "(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
209
216
  "^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?){1,})$" => "\\1, \\2",
210
- "([[:alpha:]]*),?\\s+(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
217
+ "([[:alpha:]]*),?\\s*(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
211
218
  "^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]]{2,})\\s+([[:alpha:]]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
212
219
  "^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
213
220
  "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{2,})$" => "\\1 | \\2 | \\3",
@@ -219,7 +226,7 @@ module DwcAgent
219
226
  (?i:abundant)|
220
227
  (?i:adult|juvenile)|
221
228
  (?i:administra(d|t)or)|
222
- (?i:anon)|
229
+ ^(?i:anon)$|
223
230
  (?i:australian?)|
224
231
  (?i:average)|
225
232
  (?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
@@ -329,6 +336,8 @@ module DwcAgent
329
336
  "inst",
330
337
  "nomenclatural",
331
338
  "orig",
339
+ "prof",
340
+ "professional",
332
341
  "qld",
333
342
  "registration",
334
343
  "science",
@@ -346,7 +355,7 @@ module DwcAgent
346
355
  "has not"
347
356
  ]
348
357
 
349
- TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|proff?|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|doct(eu|o)r|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
358
+ TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|proff?|dr|dra\.|drª|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|doct(eu|o)r|father|cantor|vicar|père|pastor|profa\.?|profª|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
350
359
 
351
360
  APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
352
361
 
@@ -18,6 +18,7 @@ module DwcAgent
18
18
  }
19
19
  @namae = Namae::Parser.new(options)
20
20
  @strip_out_regex = Regexp.new STRIP_OUT.to_s
21
+ @tidy_remains_regex = Regexp.new POST_STRIP_TIDY.to_s
21
22
  @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
22
23
  @phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
23
24
  @residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
@@ -31,6 +32,7 @@ module DwcAgent
31
32
  def parse(name)
32
33
  return [] if name.nil? || name == ""
33
34
  name.gsub!(@strip_out_regex, ' ')
35
+ name.gsub!(@tidy_remains_regex, '')
34
36
  name.gsub!(Regexp.union(@char_subs_regex, @phrase_subs_regex), CHAR_SUBS.merge(PHRASE_SUBS))
35
37
  @separators.each{|k| name.gsub!(k[0], k[1])}
36
38
  name.gsub!(@residual_terminators_regex, '')
@@ -4,7 +4,7 @@ module DwcAgent
4
4
 
5
5
  MAJOR = 3
6
6
  MINOR = 0
7
- PATCH = 7
7
+ PATCH = 9
8
8
  BUILD = 0
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.7.0
4
+ version: 3.0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-02 00:00:00.000000000 Z
11
+ date: 2023-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae