dwc_agent 3.4.2.0 → 3.4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df1815f4816f1738bd4b1f2860e7fbe35f247667905d0dfaa6123e32588de47f
4
- data.tar.gz: 5b019b47225dcb238a1c6149b26d9d6fb2f8825e64e78f912fe4df632d98b5cc
3
+ metadata.gz: 29cbee65088adb6c5914a955a61f7fb6ff88e84fc72394cae0168edeb8652953
4
+ data.tar.gz: a79f84cd59930282009d7f86545f4ce2a726d1818693b0abd9648add0160ca41
5
5
  SHA512:
6
- metadata.gz: 5cae0d260345a64d7a0c4e9ba1d59c13cddf78f3463621cfdb4fe17dd83ea6803f5eb4bddca3d021afde7f8910570b20fd77a4bef4b2113086fb39b32c29742c
7
- data.tar.gz: 8599bd4e6e7083066e9af8803c98ebfe69c14258c81472f1ee1a7ec67e1f3733db7cf2a00f0b1b1460d08aa8a27a957afffe7ccbe5d182a4300321bebe477dd9
6
+ metadata.gz: 79b6048d32a3ccd7e8fa6c6833c5df709f7a37cd2fc3f904d4075796f669cc33fd185aedb61893648aa310bc18abad854a8b5e0687023bb9de6eae27b93aded4
7
+ data.tar.gz: 3ddd88b8c5d5a0b0725876e3758caffa20f6fcbf1b69a55b1967e1984f67f0d117501f84cb7cc5759e038d968df069c1d4dddff1c86a8b31afe427d3ab6c73ff
@@ -1,3 +1,5 @@
1
+ require "set"
2
+
1
3
  module DwcAgent
2
4
  STRIP_OUT = %r{
3
5
  (?i:acc\s?\#)|
@@ -138,7 +140,7 @@ module DwcAgent
138
140
  \s+de\s*$|
139
141
  \.{2,}$|
140
142
  [^[:alnum:][:blank:][:punct:][∣´|ǀ∣|│`~$^+|<>]] # Removes emojis from string
141
- }x
143
+ }x.freeze
142
144
 
143
145
  SPLIT_BY = %r{
144
146
  [;,]{2,} | # Multiple semicolons or commas
@@ -170,14 +172,14 @@ module DwcAgent
170
172
  \b(?i:then(\s+by)?)\s+ | # "then (by)"
171
173
  \b(?i:veri?f?\.?\:?(\s+by)?|v(e|é)rifi(e|é)d?(\s+by)?)\s*\b | # "verif."
172
174
  \b(?i:via|from)\s*\b # "via", "from"
173
- }x
175
+ }x.freeze
174
176
 
175
177
  POST_STRIP_TIDY = %r{
176
178
  ^\s*[&,;.]\s* | # Leading whitespace followed by any combination of &, ;, or .
177
179
  [\[\]] | # Any standalone square brackets
178
180
  ^[`'".,!?]+ | # Leading repeated punctuation (` ' " . , ! ?)
179
181
  [`'",]+$ # Trailing repeated punctuation (` ' ")
180
- }x
182
+ }x.freeze
181
183
 
182
184
  CHAR_SUBS = {
183
185
  '"' => '\'',
@@ -219,7 +221,7 @@ module DwcAgent
219
221
  'prof.' => 'Prof. ',
220
222
  ' .;' => '. ;',
221
223
  ', &' => ' &'
222
- }
224
+ }.freeze
223
225
 
224
226
  COMPLEX_SEPARATORS = {
225
227
  "^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]+)$" => "\\2 \\3 \\1",
@@ -228,7 +230,8 @@ module DwcAgent
228
230
  "^(\\S{4,},\\s+(?:\\S\\.\\s*)+)\\s+(\\S{4,},\\s+(?:\\S\.\\s*)+)$" => "\\1 | \\2",
229
231
  "(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
230
232
  "^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?)+)$" => "\\1, \\2",
231
- "([[:alpha:]]*),?\\s*(.*)\\s+(van|von|v\\.|v(a|o)n\\s+der?)$" => "\\3 \\1, \\2",
233
+ "^([[:alpha:]]*),?\\s*(.*)\\s+(van|von|v\\.|v(a|o)n\\s+der?)(?i:and|&|et|e|,|;)\s*([[:alpha:]]*),?\\s*(.*)\\s+(van|von|v\\.|v(a|o)n\\s+der?)$" => "\\3 \\1, \\2 | \\7 \\5, \\6",
234
+ "^([[:alpha:]]*),?\\s*(.*)\\s+(van|von|v\\.|v(a|o)n\\s+der?)$" => "\\3 \\1, \\2",
232
235
  "^((?i:[A-Z]\\.\\s?)+)\\s?(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?)+)\\s+([[:alpha:]’`'-]{2,})\\s+([[:alpha:]’`'-]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
233
236
  "^((?i:[A-Z]\\.\\s?)+)\\s?(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?)+)\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
234
237
  "^([A-Z]{1,3})\\s+(?:and|&|et|e)\\s+([A-Z]{1,3})\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
@@ -236,7 +239,7 @@ module DwcAgent
236
239
  "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{2,})$" => "\\1 | \\2 | \\3",
237
240
  "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{3,})$" => "\\1 | \\2 | \\3 | \\4",
238
241
  "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{3,})$" => "\\1 | \\2 | \\3 | \\4 | \\5"
239
- }
242
+ }.freeze
240
243
 
241
244
  BLACKLIST = %r{
242
245
  (?i:
@@ -311,9 +314,9 @@ module DwcAgent
311
314
  workshop | garden | farm | jardin | public |
312
315
  ^de$
313
316
  )
314
- }x
317
+ }x.freeze
315
318
 
316
- FAMILY_GREENLIST = [
319
+ FAMILY_GREENLIST = Set.new([
317
320
  "Ng",
318
321
  "Srb",
319
322
  "Srp",
@@ -323,9 +326,9 @@ module DwcAgent
323
326
  "Smrt",
324
327
  "Krc",
325
328
  "Krč"
326
- ]
329
+ ]).freeze
327
330
 
328
- FAMILY_BLACKLIST = [
331
+ FAMILY_BLACKLIST = Set.new([
329
332
  "a b",
330
333
  "a e",
331
334
  "a g",
@@ -390,20 +393,20 @@ module DwcAgent
390
393
  "zw",
391
394
  "zz",
392
395
  "z-"
393
- ]
396
+ ]).freeze
394
397
 
395
- GIVEN_BLACKLIST = [
398
+ GIVEN_BLACKLIST = Set.new([
396
399
  "not any",
397
400
  "has not"
398
- ]
401
+ ]).freeze
399
402
 
400
- TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|cmdr|lt|sgt|cpl|pvt|proff?|dr|dra\.|drª|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|doct(eu|o)r|father|cantor|vicar|père|pastor|profa\.?|profª|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
403
+ TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|cmdr|lt|sgt|cpl|pvt|proff?|dr|dra\.|drª|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|doct(eu|o)r|father|cantor|vicar|père|pastor|profa\.?|profª|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i.freeze
401
404
 
402
- APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
405
+ APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i.freeze
403
406
 
404
- SUFFIX = /\s*\b(JR|Jr|jr|SR|Sr|sr|ESQ|esq|[IVX]{2,})(\.|\b)/
407
+ SUFFIX = /\s*\b(JR|Jr|jr|SR|Sr|sr|ESQ|esq|[IVX]{2,})(\.|\b)/.freeze
405
408
 
406
- PARTICLES = [
409
+ PARTICLES = Set.new([
407
410
  "ap",
408
411
  "da",
409
412
  "de",
@@ -425,7 +428,7 @@ module DwcAgent
425
428
  "van de",
426
429
  "van der",
427
430
  "von der"
428
- ]
431
+ ]).freeze
429
432
 
430
433
  VOWELS = "aeiouàáâäǎæãåāèéêëěẽēėęìíîïǐĩīıįòóôöǒœøõōùúûüǔũūűů"
431
434
 
@@ -13,7 +13,7 @@ module DwcAgent
13
13
  subs_regex: Regexp.new(CHAR_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s),
14
14
  complex_separators_regex: COMPLEX_SEPARATORS.map{|k,v| [Regexp.new(k), v] },
15
15
  residual_terminators_regex: Regexp.new(SPLIT_BY.to_s + %r{\s*\z}.to_s)
16
- }
16
+ }.freeze
17
17
 
18
18
  class << self
19
19
  attr_reader :defaults
@@ -35,7 +35,7 @@ module DwcAgent
35
35
  # @param names [String] the name or names to be parsed
36
36
  # @return [Array] the list of parsed names
37
37
  def parse(name)
38
- return [] if name.nil? || name == ""
38
+ return [] if name.nil? || name.empty?
39
39
  name.gsub!(options[:strip_out_regex], ' ')
40
40
  name.gsub!(options[:tidy_remains_regex], '')
41
41
  name.gsub!(options[:subs_regex], CHAR_SUBS)
@@ -4,7 +4,7 @@ module DwcAgent
4
4
 
5
5
  MAJOR = 3
6
6
  MINOR = 4
7
- PATCH = 2
7
+ PATCH = 3
8
8
  BUILD = 0
9
9
 
10
10
  def self.version
@@ -12,7 +12,7 @@ module DwcAgent
12
12
  end
13
13
 
14
14
  def self.date
15
- '2025-09-09'
15
+ '2026-02-23'
16
16
  end
17
17
 
18
18
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.4.2.0
4
+ version: 3.4.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2025-09-09 00:00:00.000000000 Z
10
+ date: 2026-02-23 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: namae
@@ -80,6 +79,20 @@ dependencies:
80
79
  - - "~>"
81
80
  - !ruby/object:Gem::Version
82
81
  version: '2'
82
+ - !ruby/object:Gem::Dependency
83
+ name: benchmark-ips
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '2'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '2'
83
96
  description: Parses the typically messy content in Darwin Core terms that contain
84
97
  people names
85
98
  email: davidpshorthouse@gmail.coms
@@ -102,7 +115,6 @@ homepage: https://github.com/bionomia/dwc_agent
102
115
  licenses:
103
116
  - MIT
104
117
  metadata: {}
105
- post_install_message:
106
118
  rdoc_options:
107
119
  - "--encoding"
108
120
  - UTF-8
@@ -119,8 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
131
  - !ruby/object:Gem::Version
120
132
  version: '0'
121
133
  requirements: []
122
- rubygems_version: 3.5.4
123
- signing_key:
134
+ rubygems_version: 3.6.9
124
135
  specification_version: 4
125
136
  summary: Parse Darwin Core agent terms such as recordedBy and identifiedBy
126
137
  test_files: []