dwc_agent 0.3.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfbc3ad3469ce14a1496befc2ffae9c25a17f2526dc02f1e8e7c14c7e7b431f0
4
- data.tar.gz: ddeb0f4eeb81450c1f8ec0d2233e037340650570a14f77b2fa08b5d0e65ee9ac
3
+ metadata.gz: c4ea46a2cca2719aebba4a99251aaf02f6d2fb36f21f3e6ea28b76584fc7345a
4
+ data.tar.gz: 72cde7bbdf5c8f93923710f887f299a4618e32c5d129e8cac0bbcc1a285492fd
5
5
  SHA512:
6
- metadata.gz: a8add8048e7c1ef15d9974d198cb9e74c8dd1ec210530fbe4088b4dedd993de4e94cf01ed5daf58e21bb353eaf6d8192f377fb0ba93bb01371112bdf491453de
7
- data.tar.gz: efca1ecff92dee36596d0ae655678e892b2729c47333a6af25c0e68e0ad525b4e69ecd573524498eeb5ca4e87b96f5e6e4133a5ce2200f43e5b01def0c0ca19b
6
+ metadata.gz: 77c1027c302f5b853641266a833d197c1d81045ffad0ca0a2b3f4161d1fc4443fff944e6493d9c1089e710ac5aaff33bd0075698c7b70ff0ddf90d79602c8442
7
+ data.tar.gz: 9b92955bf9421e4b5f7a0c5cc5fa04265a393f04de6d7f23217cea739732b81bd8f9fb9042c02db20fe13118f2c9f3b75e1a374787b83d8e0ee1350e3e2a6c7b
@@ -18,18 +18,26 @@ module DwcAgent
18
18
  def clean(parsed_namae)
19
19
  blank_name = { given: nil, family: nil }
20
20
 
21
+ if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
22
+ return blank_name
23
+ end
24
+
21
25
  if parsed_namae.family && parsed_namae.family.length < 2 && parsed_namae.family.count('.') == 0
22
26
  return blank_name
23
27
  end
28
+
24
29
  if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
25
30
  return blank_name
26
31
  end
32
+
27
33
  if parsed_namae.given && parsed_namae.given.length > 25
28
34
  return blank_name
29
35
  end
36
+
30
37
  if parsed_namae.given && parsed_namae.given.count('.') >= 3 && /\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
31
38
  return blank_name
32
39
  end
40
+
33
41
  if parsed_namae.display_order =~ BLACKLIST
34
42
  return blank_name
35
43
  end
@@ -103,6 +111,10 @@ module DwcAgent
103
111
  return blank_name
104
112
  end
105
113
 
114
+ if !family.nil? && FAMILY_BLACKLIST.any?{ |s| s.casecmp(family) == 0 }
115
+ return blank_name
116
+ end
117
+
106
118
  { given: given, family: family }
107
119
  end
108
120
 
@@ -20,7 +20,6 @@ module DwcAgent
20
20
  \b[,;]?\s*(?i:person\s*string)\b|
21
21
  \b[,;]?\s*(?i:colls)\.(\b|\z)|
22
22
  \b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
23
- (?i:no\s+(data|disponible))|
24
23
  \b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
25
24
  [,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
26
25
  May|Jun|Jul|Aug|Sept?|
@@ -78,7 +77,7 @@ module DwcAgent
78
77
  \b\s*\(?(?i:(fe)?male)\)?\s*\b|
79
78
  \b(?i:to\s+(sub)?spp?)\.?|
80
79
  (?i:nom\.?\s+rev\.?)|
81
- FNA|DAO|HUH|FDNMB|
80
+ FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|
82
81
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
83
82
  (?i:university|museum|exhibits?)|
84
83
  (?i:uqam)|
@@ -147,7 +146,8 @@ module DwcAgent
147
146
  '{' => '',
148
147
  '}' => '',
149
148
  '@' => '',
150
- '%' => ''
149
+ '%' => '',
150
+ '\\' => ''
151
151
  }
152
152
 
153
153
  PHRASE_SUBS = {
@@ -164,6 +164,7 @@ module DwcAgent
164
164
  BLACKLIST = %r{
165
165
  (?i:abundant)|
166
166
  (?i:adult|juvenile)|
167
+ (?i:administra(d|t)or)|
167
168
  (?i:anon)|
168
169
  (?i:australian?)|
169
170
  (?i:average)|
@@ -172,9 +173,10 @@ module DwcAgent
172
173
  (?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
173
174
  (?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
174
175
  (?i:carex|salix)|
175
- (?:catalog)|
176
+ (?i:catalog(ue)?)|
176
177
  (?i:herbarium|herbier|collection|collected|publication|specimen|species|describe|an(a|o)morph|isolated|recorded|inspection|define|status|lighthouse)|
177
178
  \b\s*(?i:help)\s*\b|
179
+ (?i:data\s+not\s+captured)|
178
180
  (?i:description|drawing|identification|remark|original|illustration|checklist|intermedia|measurement|indisting|series|imperfect)|
179
181
  (?i:desconocido)|
180
182
  (?i:exc?s?icc?at(a|i))|
@@ -192,6 +194,9 @@ module DwcAgent
192
194
  (?i:univ\.)|
193
195
  (?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
194
196
  (?i:non\s+pr(é|e)cis(é|e))|
197
+ (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
198
+ (?i:not?\s+(entered|stated))|
199
+ (?i:nomenclatur(e|al)\s+adjustment)|
195
200
  (?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
196
201
  (?i:recreation|culture)|
197
202
  (?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
@@ -216,6 +221,15 @@ module DwcAgent
216
221
  ^\s*?de\s*?$
217
222
  }x
218
223
 
219
- TITLE = /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
224
+ FAMILY_BLACKLIST = [
225
+ "der",
226
+ "van",
227
+ "von",
228
+ "the",
229
+ "of",
230
+ "curators"
231
+ ]
232
+
233
+ TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
220
234
 
221
235
  end
@@ -2,8 +2,8 @@ module DwcAgent
2
2
  class Version
3
3
 
4
4
  MAJOR = 0
5
- MINOR = 3
6
- PATCH = 2
5
+ MINOR = 4
6
+ PATCH = 3
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-21 00:00:00.000000000 Z
11
+ date: 2019-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae