dwc_agent 1.4.3 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee3ede60926c849a6db7094f63a9d03d07f3e9d13fd74428841812f8970aa681
4
- data.tar.gz: 7075c82cd35834f8dad00d57922e46f7ab0eb4f086eb4c0ad40b02c5726ca001
3
+ metadata.gz: fa4fb87ea91fd1f0e67278590192a55bfc7f1e8d6f4b8dc92c1f9f5eb508e44c
4
+ data.tar.gz: a89b51ea705885713ef8615c67e1ea10798abfe593b5646b4de9fb8e1b478762
5
5
  SHA512:
6
- metadata.gz: 9e7b655e50ec2d744d74ad44a30b35d75e76b7e0160a35ee7e6b295a03dbc343a012a702e3614c10516bf1282617ec00a0f472e2cc1aefc9d3decc9b4494a946
7
- data.tar.gz: a28bd967f7df5afaf5e71a01f3902c0bab33dc735bc0861ceb137bad2d81981ec9ae6768497afff267b986bf653f9ba1fb8091221720964866b3352f8d8e2aae
6
+ metadata.gz: d676b64441d0097bd6272e2cd694c5754c4bdaed8fd0f523ecbe28748c8ccedffd9dd1c0430f5ad25cf48c02705b8131a9ce1021c07965da0791ae5f62e36c8a
7
+ data.tar.gz: 00effae1b438e6d97ef8da8383aa407985876d148b5b30a51ca98d2befa0dc8ac4a8c69bb389f75dd08c147853522490a406470dd8c4aa20d976238cf2cb4d82
@@ -1,7 +1,7 @@
1
1
  module DwcAgent
2
2
  STRIP_OUT = %r{
3
3
  ^[\[{(]|
4
- [\]})]$|
4
+ [\]})]\??$|
5
5
  \s*?\d+\.\d+|
6
6
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
7
7
  \b[,;]?\s*(?i:et\.?\s+al)\.?|
@@ -13,7 +13,7 @@ module DwcAgent
13
13
  \b[,;]?\s*(?i:unkn?own)\b|
14
14
  \b[,;]?\s*(?i:n/a)\b|
15
15
  \b[,;]?\s*(?i:ann?onymous)\b|
16
- \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|illegible|scripsit)\)?\b|
16
+ \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit)\)?\b|
17
17
  \b[,;]?\s*(?i:importer|gift)\:?\b|
18
18
  \b[,;]?\s*(?i:frère|frere|père|pere|soeur|sister|bro)\.?(\b|\z)|
19
19
  \b[,;]?\s*(?i:string)\b|
@@ -80,11 +80,12 @@ module DwcAgent
80
80
  \b\s*\(?(?i:(fe)?male)\)?\s*\b|
81
81
  \b(?i:to\s+(sub)?spp?)\.?|
82
82
  (?i:nom\.?\s+rev\.?)|
83
- FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|
83
+ FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|
84
84
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
85
85
  (?i:university|museum|exhibits?)|
86
86
  (?i:uqam)|
87
87
  (?i:sem\s+(colec?tor|data))|
88
+ (?i:no\s+coll\.?(ector)?)|
88
89
  \b[,;]\s+\d+\z|
89
90
  ["!@?]|
90
91
  [,]?\d+|
@@ -136,8 +137,6 @@ module DwcAgent
136
137
  '|' => ' | ',
137
138
  '(' => ' ',
138
139
  ')' => ' ',
139
- '[' => ' ',
140
- ']' => ' ',
141
140
  '?' => '',
142
141
  '!' => '',
143
142
  '=' => '',
@@ -196,7 +195,7 @@ module DwcAgent
196
195
  (?i:geographic)|
197
196
  (?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
198
197
  (?i:univ\.)|
199
- (?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
198
+ (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
200
199
  (?i:non\s+pr(é|e)cis(é|e))|
201
200
  (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
202
201
  (?i:not?\s+(entered|stated))|
@@ -211,7 +210,7 @@ module DwcAgent
211
210
  (?i:sequence\s+data)|
212
211
  (?i:size|large|colou?r)\s+|
213
212
  (?i:skeleton)|
214
- (?i:survey|assessment|station|monitor|stn\.|index|project|bureau|engine|exchange|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
213
+ (?i:survey|assessment|station|monitor|stn\.|index|project|bureau|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
215
214
  (?i:submersible)|
216
215
  (?i:synonymy?)|(topo|syn|holo)type|
217
216
  (?i:systematic|perspective)|
@@ -230,21 +229,28 @@ module DwcAgent
230
229
  }x
231
230
 
232
231
  FAMILY_BLACKLIST = [
232
+ "da",
233
+ "de'",
234
+ "del",
233
235
  "der",
236
+ "du",
237
+ "el",
234
238
  "van",
235
239
  "von",
236
240
  "the",
237
241
  "of",
238
- "curators",
239
- "nomenclatural",
240
242
  "adjustment",
241
243
  "available",
242
- "data",
243
- "orig",
244
- "science",
245
244
  "catalogue",
245
+ "curators",
246
+ "data",
247
+ "determination",
248
+ "dissection",
246
249
  "entered",
247
- "registration"
250
+ "nomenclatural",
251
+ "orig",
252
+ "registration",
253
+ "science"
248
254
  ]
249
255
 
250
256
  GIVEN_BLACKLIST = [
@@ -17,7 +17,7 @@ module DwcAgent
17
17
 
18
18
  @strip_out_regex = Regexp.new STRIP_OUT.to_s
19
19
  @residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
20
- @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join('\\')].to_s
20
+ @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
21
21
  @phrase_subs_regex = Regexp.new (PHRASE_SUBS.keys.join('|')).to_s
22
22
  @complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
23
23
  @add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
@@ -30,6 +30,7 @@ module DwcAgent
30
30
  def parse(name)
31
31
  return [] if name.nil? || name == ""
32
32
  name.gsub!(@strip_out_regex, ' ')
33
+ name.gsub!(/\[|\]/, '')
33
34
  name.gsub!(@char_subs_regex, CHAR_SUBS)
34
35
  name.gsub!(@phrase_subs_regex, PHRASE_SUBS)
35
36
  name.gsub!(@add_separators_regex, '\1 \2')
@@ -3,7 +3,7 @@ module DwcAgent
3
3
 
4
4
  MAJOR = 1
5
5
  MINOR = 4
6
- PATCH = 3
6
+ PATCH = 4
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.3
4
+ version: 1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-03 00:00:00.000000000 Z
11
+ date: 2020-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae