dwc_agent 1.4.3 → 1.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee3ede60926c849a6db7094f63a9d03d07f3e9d13fd74428841812f8970aa681
4
- data.tar.gz: 7075c82cd35834f8dad00d57922e46f7ab0eb4f086eb4c0ad40b02c5726ca001
3
+ metadata.gz: fa4fb87ea91fd1f0e67278590192a55bfc7f1e8d6f4b8dc92c1f9f5eb508e44c
4
+ data.tar.gz: a89b51ea705885713ef8615c67e1ea10798abfe593b5646b4de9fb8e1b478762
5
5
  SHA512:
6
- metadata.gz: 9e7b655e50ec2d744d74ad44a30b35d75e76b7e0160a35ee7e6b295a03dbc343a012a702e3614c10516bf1282617ec00a0f472e2cc1aefc9d3decc9b4494a946
7
- data.tar.gz: a28bd967f7df5afaf5e71a01f3902c0bab33dc735bc0861ceb137bad2d81981ec9ae6768497afff267b986bf653f9ba1fb8091221720964866b3352f8d8e2aae
6
+ metadata.gz: d676b64441d0097bd6272e2cd694c5754c4bdaed8fd0f523ecbe28748c8ccedffd9dd1c0430f5ad25cf48c02705b8131a9ce1021c07965da0791ae5f62e36c8a
7
+ data.tar.gz: 00effae1b438e6d97ef8da8383aa407985876d148b5b30a51ca98d2befa0dc8ac4a8c69bb389f75dd08c147853522490a406470dd8c4aa20d976238cf2cb4d82
@@ -1,7 +1,7 @@
1
1
  module DwcAgent
2
2
  STRIP_OUT = %r{
3
3
  ^[\[{(]|
4
- [\]})]$|
4
+ [\]})]\??$|
5
5
  \s*?\d+\.\d+|
6
6
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
7
7
  \b[,;]?\s*(?i:et\.?\s+al)\.?|
@@ -13,7 +13,7 @@ module DwcAgent
13
13
  \b[,;]?\s*(?i:unkn?own)\b|
14
14
  \b[,;]?\s*(?i:n/a)\b|
15
15
  \b[,;]?\s*(?i:ann?onymous)\b|
16
- \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|illegible|scripsit)\)?\b|
16
+ \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit)\)?\b|
17
17
  \b[,;]?\s*(?i:importer|gift)\:?\b|
18
18
  \b[,;]?\s*(?i:frère|frere|père|pere|soeur|sister|bro)\.?(\b|\z)|
19
19
  \b[,;]?\s*(?i:string)\b|
@@ -80,11 +80,12 @@ module DwcAgent
80
80
  \b\s*\(?(?i:(fe)?male)\)?\s*\b|
81
81
  \b(?i:to\s+(sub)?spp?)\.?|
82
82
  (?i:nom\.?\s+rev\.?)|
83
- FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|
83
+ FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|
84
84
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
85
85
  (?i:university|museum|exhibits?)|
86
86
  (?i:uqam)|
87
87
  (?i:sem\s+(colec?tor|data))|
88
+ (?i:no\s+coll\.?(ector)?)|
88
89
  \b[,;]\s+\d+\z|
89
90
  ["!@?]|
90
91
  [,]?\d+|
@@ -136,8 +137,6 @@ module DwcAgent
136
137
  '|' => ' | ',
137
138
  '(' => ' ',
138
139
  ')' => ' ',
139
- '[' => ' ',
140
- ']' => ' ',
141
140
  '?' => '',
142
141
  '!' => '',
143
142
  '=' => '',
@@ -196,7 +195,7 @@ module DwcAgent
196
195
  (?i:geographic)|
197
196
  (?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
198
197
  (?i:univ\.)|
199
- (?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
198
+ (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
200
199
  (?i:non\s+pr(é|e)cis(é|e))|
201
200
  (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
202
201
  (?i:not?\s+(entered|stated))|
@@ -211,7 +210,7 @@ module DwcAgent
211
210
  (?i:sequence\s+data)|
212
211
  (?i:size|large|colou?r)\s+|
213
212
  (?i:skeleton)|
214
- (?i:survey|assessment|station|monitor|stn\.|index|project|bureau|engine|exchange|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
213
+ (?i:survey|assessment|station|monitor|stn\.|index|project|bureau|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
215
214
  (?i:submersible)|
216
215
  (?i:synonymy?)|(topo|syn|holo)type|
217
216
  (?i:systematic|perspective)|
@@ -230,21 +229,28 @@ module DwcAgent
230
229
  }x
231
230
 
232
231
  FAMILY_BLACKLIST = [
232
+ "da",
233
+ "de'",
234
+ "del",
233
235
  "der",
236
+ "du",
237
+ "el",
234
238
  "van",
235
239
  "von",
236
240
  "the",
237
241
  "of",
238
- "curators",
239
- "nomenclatural",
240
242
  "adjustment",
241
243
  "available",
242
- "data",
243
- "orig",
244
- "science",
245
244
  "catalogue",
245
+ "curators",
246
+ "data",
247
+ "determination",
248
+ "dissection",
246
249
  "entered",
247
- "registration"
250
+ "nomenclatural",
251
+ "orig",
252
+ "registration",
253
+ "science"
248
254
  ]
249
255
 
250
256
  GIVEN_BLACKLIST = [
@@ -17,7 +17,7 @@ module DwcAgent
17
17
 
18
18
  @strip_out_regex = Regexp.new STRIP_OUT.to_s
19
19
  @residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
20
- @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join('\\')].to_s
20
+ @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
21
21
  @phrase_subs_regex = Regexp.new (PHRASE_SUBS.keys.join('|')).to_s
22
22
  @complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
23
23
  @add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
@@ -30,6 +30,7 @@ module DwcAgent
30
30
  def parse(name)
31
31
  return [] if name.nil? || name == ""
32
32
  name.gsub!(@strip_out_regex, ' ')
33
+ name.gsub!(/\[|\]/, '')
33
34
  name.gsub!(@char_subs_regex, CHAR_SUBS)
34
35
  name.gsub!(@phrase_subs_regex, PHRASE_SUBS)
35
36
  name.gsub!(@add_separators_regex, '\1 \2')
@@ -3,7 +3,7 @@ module DwcAgent
3
3
 
4
4
  MAJOR = 1
5
5
  MINOR = 4
6
- PATCH = 3
6
+ PATCH = 4
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.3
4
+ version: 1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-03 00:00:00.000000000 Z
11
+ date: 2020-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae