dwc_agent 3.3.0.0 → 3.4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40a0a33de602297e5e87e3059edbd0c88e0ad36bbb90f0803362698120f9e3b1
4
- data.tar.gz: 94705d02d7cb7a3ac1647f903d15db7140ece849008ba84f6d49f1bf678abf14
3
+ metadata.gz: a8c59f7eec17563af7a1e0ab3a931055d8181c7331f4ad4e569a61a4a05a07b8
4
+ data.tar.gz: 204d62a3fd1d5e003e455f271813d38691e00710a7e47b50e87bb60b7a4b7a90
5
5
  SHA512:
6
- metadata.gz: 1baeb5c1367e570139fca30a1181395030b12d09b1d4f12a026ad48e28ebaecee70b20d81b96b02f2600da269f2342281c8929facc70f0b9c571cafcfb273501
7
- data.tar.gz: bf7676d6ed221258ab6efe217a7b695cb619c54be8a8d14f16bb23a3f1bc9e0fa0a33474211e7f20e5a08b058e2ab4b57e4c5e1367be74d9719ad2502a035d77
6
+ metadata.gz: b1872d38cdc1983d090850a4d0eb313c84f9b5a5c753d4a1af39fc016a3306245224f23f3fb021e1543f59646f685ba21167e80aef30bebb414349a0f675929a
7
+ data.tar.gz: 5fb3deb9a940f9407abeda5d558a2275bcc4df8180f5966e26af6692d74c044b3af88f4732a26185e2362a35d42883d45d47f1555ce9fecc10d9f0a5c37c91c9
@@ -126,9 +126,8 @@ module DwcAgent
126
126
  end
127
127
 
128
128
  if parsed_namae.family.nil? &&
129
- !parsed_namae.given.nil? &&
130
- !parsed_namae.given.include?(".")
131
- parsed_namae.family = parsed_namae.given
129
+ !parsed_namae.given.nil?
130
+ parsed_namae.family = parsed_namae.given.delete_suffix(".")
132
131
  parsed_namae.given = nil
133
132
  end
134
133
 
@@ -10,7 +10,7 @@ module DwcAgent
10
10
  [,]?\s*\#*\s+\d+\-(?i:[A-Z]|\d)+\-?\d*[A-Za-z]*\z|
11
11
  \d*[A-Za-z]*\d*-\d*\z|
12
12
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
13
- [,;\s]{1,}(?:et\.?\s+al|&\s+al)l?\.?|
13
+ [,;\s]+(?:et\.?\s+al|&\s+al)l?\.?|
14
14
  \b[,;]?\s*(?i:etal)\.?|
15
15
  \b[,;]?\s*(?i:et.al)\.?|
16
16
  \b\s+(bis|ter)(\b|\z)|
@@ -32,7 +32,7 @@ module DwcAgent
32
32
  ^(?i:collection)\:?\s+|\s*(?i:collection)\s*$|
33
33
  \b[,;]?\s*(?i:colls)\.(\b|\z)|
34
34
  (?i:contactid)|
35
- ^(?i:dupl)[.,]{1,}|
35
+ ^(?i:dupl)[.,]+|
36
36
  \b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
37
37
  [,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
38
38
  May|Jun|Jul|Aug|Sept?|
@@ -141,42 +141,42 @@ module DwcAgent
141
141
  }x
142
142
 
143
143
  SPLIT_BY = %r{
144
- [;,]{2,}|
145
- [–|ǀ∣|│&+\/;:]|
146
- \s+-\s+|
147
- \s+a\.\s+|
148
- \b(con|e|y|i|en|et|or|per|for|und)\s*\b|
149
- \b(?i:and|with)\s*\b|
150
- \b(?i:annotated(\s+by)?)\s*\b|
151
- \b(?i:coll\.)\s*\b|
152
- \b(?i:comm\.?)\s*\b|
153
- \b(?i:communicate?d(\s+to)?)\s*\b|
154
- \b(?i:conf\.?(\s+by)?|confirmed(\s+by)?)\s*\b|
155
- \b(?i:confirmada)(\s+por)?\s*\b|
156
- \b(?i:checked?(\s+by)?)\s*\b|
157
- \b(?i:det\.?(\s+by)?)\s*\b|
158
- \b(?i:(donated)?\s*by)\s+|
159
- \b(?i:dupl?[.,]?(\s+by)?|duplicate(\s+by)?)\s*\b|
160
- \b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
161
- \b(?i:in?dentified(\s+by)?)\s*\b|
162
- \b(?i:in\s+coll\.?\s*\b)|
163
- \b(?i:in\s+part(\s+by)?)\s*\b|
164
- \b(?i:och)\s*\b|
165
- \b(?i:prep\.?\s+(?i:by)?)\s*\b|
166
- \b(?i:purchased?)(\s+by)?\s*\b|
167
- \b(?i:redet\.?(\s+by?)?)\s*\b|
168
- \b(?i:reidentified(\s+by)?)\s*\b|
169
- \b(?i:stet)\s*\b|
170
- \b(?i:then(\s+by)?)\s+|
171
- \b(?i:veri?f?\.?\:?(\s+by)?|v(e|é)rifi(e|é)d?(\s+by)?)\s*\b|
172
- \b(?i:via|from)\s*\b
144
+ [;,]{2,} | # Multiple semicolons or commas
145
+ [–|ǀ∣|│&+\/;:] | # Various separators
146
+ \s+-\s+ | # Dash surrounded by spaces
147
+ \s+a\.\s+ | # "a." surrounded by spaces
148
+ \b(con|e|y|i|en|et|or|per|for|und)\s*\b | # Short conjunctions or prepositions
149
+ \b(?i:and|with)\s*\b | # Case-insensitive "and", "with"
150
+ \b(?i:annotated(\s+by)?)\s*\b | # "annotated (by)"
151
+ \b(?i:coll\.)\s*\b | # "coll."
152
+ \b(?i:comm\.?)\s*\b | # "comm."
153
+ \b(?i:communicate?d(\s+to)?)\s*\b | # "communicated (to)"
154
+ \b(?i:conf\.?(\s+by)?|confirmed(\s+by)?)\s*\b | # "conf.", "confirmed (by)"
155
+ \b(?i:confirmada)(\s+por)?\s*\b | # "confirmada (por)"
156
+ \b(?i:checked?(\s+by)?)\s*\b | # "checked (by)"
157
+ \b(?i:det\.?(\s+by)?)\s*\b | # "det."
158
+ \b(?i:(donated)?\s*by)\s+ | # "donated by"
159
+ \b(?i:dupl?[.,]?(\s+by)?|duplicate(\s+by)?)\s*\b | # "dupl.", "duplicate"
160
+ \b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b | # "ex.", "examined (by)"
161
+ \b(?i:in?dentified(\s+by)?)\s*\b | # "identified (by)"
162
+ \b(?i:in\s+coll\.?\s*\b) | # "in coll."
163
+ \b(?i:in\s+part(\s+by)?)\s*\b | # "in part (by)"
164
+ \b(?i:och)\s*\b | # "och"
165
+ \b(?i:prep\.?\s+(?i:by)?)\s*\b | # "prep. by"
166
+ \b(?i:purchased?)(\s+by)?\s*\b | # "purchased (by)"
167
+ \b(?i:redet\.?(\s+by?)?)\s*\b | # "redet."
168
+ \b(?i:reidentified(\s+by)?)\s*\b | # "reidentified"
169
+ \b(?i:stet)\s*\b | # "stet"
170
+ \b(?i:then(\s+by)?)\s+ | # "then (by)"
171
+ \b(?i:veri?f?\.?\:?(\s+by)?|v(e|é)rifi(e|é)d?(\s+by)?)\s*\b | # "verif."
172
+ \b(?i:via|from)\s*\b # "via", "from"
173
173
  }x
174
174
 
175
175
  POST_STRIP_TIDY = %r{
176
- ^\s*[&,;.]\s*|
177
- [\[\]]|
178
- ^[`'".,!?]{1,}|
179
- [`'",]{1,}$
176
+ ^\s*[&,;.]\s* | # Leading whitespace followed by any combination of &, ;, or .
177
+ [\[\]] | # Any standalone square brackets
178
+ ^[`'".,!?]+ | # Leading repeated punctuation (` ' " . , ! ?)
179
+ [`'",]+$ # Trailing repeated punctuation (` ' ")
180
180
  }x
181
181
 
182
182
  CHAR_SUBS = {
@@ -202,10 +202,7 @@ module DwcAgent
202
202
  '%' => '',
203
203
  '\\' => '',
204
204
  '´' => '\'',
205
- '+' => ' | '
206
- }
207
-
208
- PHRASE_SUBS = {
205
+ '+' => ' | ',
209
206
  ', ph.d.' => ' Ph.D.',
210
207
  ', Ph.D.' => ' Ph.D.',
211
208
  ', bro.' => ' Bro.',
@@ -224,94 +221,96 @@ module DwcAgent
224
221
  ', &' => ' &'
225
222
  }
226
223
 
227
- SEPARATORS = {
228
- "^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]{1,})$" => "\\2 \\3 \\1",
224
+ COMPLEX_SEPARATORS = {
225
+ "^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]+)$" => "\\2 \\3 \\1",
229
226
  "^(Mrs?\\.?)\\s+&\\s+(Mrs?\\.?)\\s+(.*)$" => "\\1 \\3 | \\2 \\3",
230
- "^([A-Z]{1}\\.\\s*[[:alpha:]]{1,}),\\s*?([A-Z.]{1,})$" => "\\1 \\2",
231
- "^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
227
+ "^([A-Z]{1}\\.\\s*[[:alpha:]]+),\\s*?([A-Z.]+)$" => "\\1 \\2",
228
+ "^(\\S{4,},\\s+(?:\\S\\.\\s*)+)\\s+(\\S{4,},\\s+(?:\\S\.\\s*)+)$" => "\\1 | \\2",
232
229
  "(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
233
- "^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?){1,})$" => "\\1, \\2",
230
+ "^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?)+)$" => "\\1, \\2",
234
231
  "([[:alpha:]]*),?\\s*(.*)\\s+(van|von|v\\.|v(a|o)n\\s+der?)$" => "\\3 \\1, \\2",
235
- "^((?i:[A-Z]\\.\\s?){1,})\\s?(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?){1,})\\s+([[:alpha:]’`'-]{2,})\\s+([[:alpha:]’`'-]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
236
- "^((?i:[A-Z]\\.\\s?){1,})\\s?(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?){1,})\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
232
+ "^((?i:[A-Z]\\.\\s?)+)\\s?(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?)+)\\s+([[:alpha:]’`'-]{2,})\\s+([[:alpha:]’`'-]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
233
+ "^((?i:[A-Z]\\.\\s?)+)\\s?(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?)+)\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
237
234
  "^([A-Z]{1,3})\\s+(?:and|&|et|e)\\s+([A-Z]{1,3})\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
238
- "^((?i:[A-Z]\\.\\s?){1,}),\\s+([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?){1,})\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\4 | \\2 \\4 | \\3 \\4 | \\5",
235
+ "^((?i:[A-Z]\\.\\s?)+),\\s+([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+((?i:[A-Z]\\.\\s?)+)\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\4 | \\2 \\4 | \\3 \\4 | \\5",
239
236
  "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{2,})$" => "\\1 | \\2 | \\3",
240
237
  "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{3,})$" => "\\1 | \\2 | \\3 | \\4",
241
238
  "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{3,})$" => "\\1 | \\2 | \\3 | \\4 | \\5"
242
239
  }
243
240
 
244
241
  BLACKLIST = %r{
245
- (?i:abundant)|
246
- (?i:adult|juvenile)|
247
- (?i:administra(d|t)or)|
248
- ^(?i:anon)$|
249
- (?i:australian?)|
250
- (?i:average)|
251
- (?i:believe|unclear|ill?egible|suggested|(dis)?agrees?)|approach|
252
- \b\s*(?i:none)\s*\b|
253
- (?i:barcod)|
254
- (?i:BgWd)|
255
- (?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
256
- (?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
257
- (?i:carex|salix)|
258
- (?i:catalog(ue)?)|
259
- (?i:conservator)|
260
- (?i:herbarium|herbier|collection|collected|publication|specimen|species|describe|an(a|o)morph|isolated|recorded|inspection|define|status|lighthouse)|
261
- \b\s*(?i:help)\s*\b|
262
- (?i:data\s+not\s+captured)|
263
- (?i:description|drawing|identification|remark|original|illustration|checklist|intermedia|measurement|indisting|series|imperfect)|
264
- (?i:desconocido)|
265
- (?i:exc?s?icc?at(a|i))|
266
- (?i:evidence)|
267
- (?i:exporter)|
268
- (?i:foundation)|
269
- (?i:ichthyology)|
270
- (?i:inconn?u)|
271
- (?i:internation|gou?vern|ministry|extension|unit|district|provincial|na(c|t)ional|military|region|environ|natur(e|al)|naturelles|division|program|direction|national)|
272
- (?i:label)|
273
- (?i:o?\.?m\.?n\.?r\.?)|
274
- (?i:measurement)|
275
- (?i:ent(o|y)mology)|
276
- (?i:malacology)|
277
- (?i:geographic)|
278
- (?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
279
- (?i:univ\.)|
280
- \b\s*(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|personnel|staff|family|captain|friends|assistant|worker|gamekeeper)\s*\b|
281
- (?i:non\s+pr(é|e)cis(é|e))|
282
- (?i:no\s+consta)|
283
- (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
284
- (?i:not?\s+(entered|stated))|
285
- (?i:nomenclatur(e|al)\s+adjustment)|
286
- (?i:not\s+available)|
287
- (?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
288
- (?i:popa\s+observers?)|
289
- (?i:recreation|culture)|
290
- (?i:renseigné)|
291
- (?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
292
- (?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|commission|consortium|council|club|exposit|alliance|protective|circle)|
293
- ^(?i:class)\s*\b|
294
- (?i:commercial|control|product)|
295
- ^(?i:company)\s*\b|
296
- (?i:sequence\s+data)|
297
- (?i:size|large|colou?r)\s+|
298
- (?i:skeleton)|
299
- (?i:survey|assessment|station|monitor|stn\.|project|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
300
- ^(?i:index)\s*\b|
301
- (?i:submersible)|
302
- (?i:synonymy?)|
303
- (?i:systematic|perspective)|
304
- ^\s*(?i:off|too|the)\s*\b|
305
- (?i:taxiderm(ies|y))|
306
- (?i:though)|
307
- (?i:texas\s+instruments?)\s*?(for)?|
308
- (?:tropical)|
309
- (?i:toward|seen\s+at)|
310
- (?i:unidentified|unspecified|unk?nown?|unnamed|unread|unmistak|no agent)|
311
- (?i:urn\:)|
312
- (?i:usda|ucla)|
313
- (?i:workshop|garden|farm|jardin|public)|
314
- ^\s*?de\s*?$
242
+ (?i:
243
+ abundant |
244
+ adult | juvenile |
245
+ administra(?:d|t)or |
246
+ ^anon$ |
247
+ australian? |
248
+ average |
249
+ believe | unclear | ill?egible | suggested | (dis)?agrees? | approach |
250
+ \bnone\b |
251
+ barcod |
252
+ bgwd |
253
+ (biolog|botan|zoo|ecolog|mycol|(?:in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture) |
254
+ (bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america) |
255
+ carex | salix |
256
+ catalog(?:ue)? |
257
+ conservator |
258
+ (herbarium|herbier|collection|collected|publication|specimen|species|describe|an(?:a|o)morph|isolated|recorded|inspection|define|status|lighthouse) |
259
+ \bhelp\b |
260
+ data\s+not\s+captured |
261
+ (description|drawing|identification|remark|original|illustration|checklist|intermedia|measurement|indisting|series|imperfect) |
262
+ desconocido |
263
+ exc(?:s?icc?at(?:a|i)) |
264
+ evidence |
265
+ exporter |
266
+ foundation |
267
+ ichthyology |
268
+ inconn?u |
269
+ (internation|gou?vern|ministry|extension|unit|district|provincial|na(?:c|t)ional|military|region|environ|natur(?:e|al)|naturelles|division|program|direction) |
270
+ label |
271
+ o\.?m\.?n\.?r\.? |
272
+ measurement |
273
+ ent(?:o|y)mology |
274
+ malacology |
275
+ geographic |
276
+ (mus(?:eum|ée)|universit(?:y|é|e|at)|college|institute?|acad(?:e|é)m|school|écol(?:e|iers?)|laboratoi?r|project|polytech|dep(?:t|artment)|research|clinic|hospital|cientifica|sanctuary|safari) |
277
+ univ\. |
278
+ \b(graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(?:a|e)n|police|taxonomist|consultant|participant(?:es)?|team|(?:é|e)quipe|memb(?:er|re)|crew|group|personnel|staff|family|captain|friends|assistant|worker|gamekeeper)\b |
279
+ non\s+pr(?:é|e)cis(?:é|e) |
280
+ no\s+consta |
281
+ no\s+(agent\s+)?(?:data|disponible)(?:\s+available)? |
282
+ not?\s+(entered|stated) |
283
+ nomenclatur(?:e|al)\s+adjustment |
284
+ not\s+available |
285
+ (ontario|qu(?:e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?) |
286
+ popa\s+observers? |
287
+ recreation | culture |
288
+ renseigné |
289
+ (shaped|dark|pale|areas|phase|spotting|interior|between|closer) |
290
+ soci(?:e|é)t(?:y|é) | cent(?:er|re) | community | history | conservation | conference | assoc | commission | consortium | council | club | exposit | alliance | protective | circle |
291
+ ^class\b |
292
+ commercial | control | product |
293
+ ^company\b |
294
+ sequence\s+data |
295
+ size | large | colou?r |
296
+ skeleton |
297
+ survey | assessment | station | monitor | stn\. | project | engine | (e|é)x?chang(?:e|é)s? | ex(?:c|k)urs(?:e|o|ó)n? | exped\.? | exp(?:e|i)di(?:c|t)i(?:e|o|ó)n? | experiment | explora(?:d|t) | festival | generation | inventory | marine | service |
298
+ ^index\b |
299
+ submersible |
300
+ synonymy? |
301
+ systematic | perspective |
302
+ ^(?:off|too|the)\b |
303
+ taxiderm(?:ies|y) |
304
+ though |
305
+ texas\s+instruments?(?:\s+for)? |
306
+ tropical |
307
+ toward | seen\s+at |
308
+ unidentified | unspecified | unk?nown? | unnamed | unread | unmistak | no agent |
309
+ urn: |
310
+ usda | ucla |
311
+ workshop | garden | farm | jardin | public |
312
+ ^de$
313
+ )
315
314
  }x
316
315
 
317
316
  FAMILY_GREENLIST = [
@@ -402,7 +401,7 @@ module DwcAgent
402
401
 
403
402
  APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
404
403
 
405
- SUFFIX = /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/
404
+ SUFFIX = /\s*\b(JR|Jr|jr|SR|Sr|sr|ESQ|esq|[IVX]{2,})(\.|\b)/
406
405
 
407
406
  PARTICLES = [
408
407
  "ap",
@@ -10,10 +10,9 @@ module DwcAgent
10
10
  suffix: SUFFIX,
11
11
  strip_out_regex: Regexp.new(STRIP_OUT.to_s),
12
12
  tidy_remains_regex: Regexp.new(POST_STRIP_TIDY.to_s),
13
- char_subs_regex: Regexp.new([CHAR_SUBS.keys.join].to_s),
14
- phrase_subs_regex: Regexp.new(PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s),
15
- residual_terminators_regex: Regexp.new(SPLIT_BY.to_s + %r{\s*\z}.to_s),
16
- separators: SEPARATORS.map{|k,v| [ Regexp.new(k), v] }
13
+ subs_regex: Regexp.new(CHAR_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s),
14
+ complex_separators_regex: COMPLEX_SEPARATORS.map{|k,v| [Regexp.new(k), v] },
15
+ residual_terminators_regex: Regexp.new(SPLIT_BY.to_s + %r{\s*\z}.to_s)
17
16
  }
18
17
 
19
18
  class << self
@@ -39,8 +38,8 @@ module DwcAgent
39
38
  return [] if name.nil? || name == ""
40
39
  name.gsub!(options[:strip_out_regex], ' ')
41
40
  name.gsub!(options[:tidy_remains_regex], '')
42
- name.gsub!(Regexp.union(options[:char_subs_regex], options[:phrase_subs_regex]), CHAR_SUBS.merge(PHRASE_SUBS))
43
- options[:separators].each{|k| name.gsub!(k[0], k[1])}
41
+ name.gsub!(options[:subs_regex], CHAR_SUBS)
42
+ options[:complex_separators_regex].each{|k| name.gsub!(k[0], k[1])}
44
43
  name.gsub!(options[:residual_terminators_regex], '')
45
44
  name.squeeze!(' ')
46
45
  name.strip!
@@ -3,7 +3,7 @@ module DwcAgent
3
3
  class Version
4
4
 
5
5
  MAJOR = 3
6
- MINOR = 3
6
+ MINOR = 4
7
7
  PATCH = 0
8
8
  BUILD = 0
9
9
 
@@ -11,6 +11,10 @@ module DwcAgent
11
11
  [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
12
12
  end
13
13
 
14
+ def self.date
15
+ '2025-08-20'
16
+ end
17
+
14
18
  end
15
19
 
16
20
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0.0
4
+ version: 3.4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-08 00:00:00.000000000 Z
11
+ date: 2025-08-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae