dwc_agent 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +2 -2
- data/lib/dwc_agent/constants.rb +9 -6
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4ea46a2cca2719aebba4a99251aaf02f6d2fb36f21f3e6ea28b76584fc7345a
|
4
|
+
data.tar.gz: 72cde7bbdf5c8f93923710f887f299a4618e32c5d129e8cac0bbcc1a285492fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77c1027c302f5b853641266a833d197c1d81045ffad0ca0a2b3f4161d1fc4443fff944e6493d9c1089e710ac5aaff33bd0075698c7b70ff0ddf90d79602c8442
|
7
|
+
data.tar.gz: 9b92955bf9421e4b5f7a0c5cc5fa04265a393f04de6d7f23217cea739732b81bd8f9fb9042c02db20fe13118f2c9f3b75e1a374787b83d8e0ee1350e3e2a6c7b
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -18,7 +18,7 @@ module DwcAgent
|
|
18
18
|
def clean(parsed_namae)
|
19
19
|
blank_name = { given: nil, family: nil }
|
20
20
|
|
21
|
-
if parsed_namae.family && FAMILY_BLACKLIST.
|
21
|
+
if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
22
22
|
return blank_name
|
23
23
|
end
|
24
24
|
|
@@ -111,7 +111,7 @@ module DwcAgent
|
|
111
111
|
return blank_name
|
112
112
|
end
|
113
113
|
|
114
|
-
if !family.nil? && FAMILY_BLACKLIST.
|
114
|
+
if !family.nil? && FAMILY_BLACKLIST.any?{ |s| s.casecmp(family) == 0 }
|
115
115
|
return blank_name
|
116
116
|
end
|
117
117
|
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -20,7 +20,6 @@ module DwcAgent
|
|
20
20
|
\b[,;]?\s*(?i:person\s*string)\b|
|
21
21
|
\b[,;]?\s*(?i:colls)\.(\b|\z)|
|
22
22
|
\b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
|
23
|
-
(?i:no\s+(data|disponible))|
|
24
23
|
\b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
|
25
24
|
[,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
|
26
25
|
May|Jun|Jul|Aug|Sept?|
|
@@ -78,7 +77,7 @@ module DwcAgent
|
|
78
77
|
\b\s*\(?(?i:(fe)?male)\)?\s*\b|
|
79
78
|
\b(?i:to\s+(sub)?spp?)\.?|
|
80
79
|
(?i:nom\.?\s+rev\.?)|
|
81
|
-
FNA|DAO|HUH|FDNMB|
|
80
|
+
FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|
|
82
81
|
AFSC\/POLISH\s+SORTING\s+CTR\.?|
|
83
82
|
(?i:university|museum|exhibits?)|
|
84
83
|
(?i:uqam)|
|
@@ -165,6 +164,7 @@ module DwcAgent
|
|
165
164
|
BLACKLIST = %r{
|
166
165
|
(?i:abundant)|
|
167
166
|
(?i:adult|juvenile)|
|
167
|
+
(?i:administra(d|t)or)|
|
168
168
|
(?i:anon)|
|
169
169
|
(?i:australian?)|
|
170
170
|
(?i:average)|
|
@@ -173,9 +173,10 @@ module DwcAgent
|
|
173
173
|
(?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
|
174
174
|
(?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
|
175
175
|
(?i:carex|salix)|
|
176
|
-
(
|
176
|
+
(?i:catalog(ue)?)|
|
177
177
|
(?i:herbarium|herbier|collection|collected|publication|specimen|species|describe|an(a|o)morph|isolated|recorded|inspection|define|status|lighthouse)|
|
178
178
|
\b\s*(?i:help)\s*\b|
|
179
|
+
(?i:data\s+not\s+captured)|
|
179
180
|
(?i:description|drawing|identification|remark|original|illustration|checklist|intermedia|measurement|indisting|series|imperfect)|
|
180
181
|
(?i:desconocido)|
|
181
182
|
(?i:exc?s?icc?at(a|i))|
|
@@ -193,7 +194,9 @@ module DwcAgent
|
|
193
194
|
(?i:univ\.)|
|
194
195
|
(?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
|
195
196
|
(?i:non\s+pr(é|e)cis(é|e))|
|
196
|
-
(?i:
|
197
|
+
(?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
|
198
|
+
(?i:not?\s+(entered|stated))|
|
199
|
+
(?i:nomenclatur(e|al)\s+adjustment)|
|
197
200
|
(?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
|
198
201
|
(?i:recreation|culture)|
|
199
202
|
(?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
|
@@ -223,8 +226,8 @@ module DwcAgent
|
|
223
226
|
"van",
|
224
227
|
"von",
|
225
228
|
"the",
|
226
|
-
"
|
227
|
-
"
|
229
|
+
"of",
|
230
|
+
"curators"
|
228
231
|
]
|
229
232
|
|
230
233
|
TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|