dwc_agent 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +2 -2
- data/lib/dwc_agent/constants.rb +9 -6
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c4ea46a2cca2719aebba4a99251aaf02f6d2fb36f21f3e6ea28b76584fc7345a
|
|
4
|
+
data.tar.gz: 72cde7bbdf5c8f93923710f887f299a4618e32c5d129e8cac0bbcc1a285492fd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 77c1027c302f5b853641266a833d197c1d81045ffad0ca0a2b3f4161d1fc4443fff944e6493d9c1089e710ac5aaff33bd0075698c7b70ff0ddf90d79602c8442
|
|
7
|
+
data.tar.gz: 9b92955bf9421e4b5f7a0c5cc5fa04265a393f04de6d7f23217cea739732b81bd8f9fb9042c02db20fe13118f2c9f3b75e1a374787b83d8e0ee1350e3e2a6c7b
|
data/lib/dwc_agent/cleaner.rb
CHANGED
|
@@ -18,7 +18,7 @@ module DwcAgent
|
|
|
18
18
|
def clean(parsed_namae)
|
|
19
19
|
blank_name = { given: nil, family: nil }
|
|
20
20
|
|
|
21
|
-
if parsed_namae.family && FAMILY_BLACKLIST.
|
|
21
|
+
if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
|
22
22
|
return blank_name
|
|
23
23
|
end
|
|
24
24
|
|
|
@@ -111,7 +111,7 @@ module DwcAgent
|
|
|
111
111
|
return blank_name
|
|
112
112
|
end
|
|
113
113
|
|
|
114
|
-
if !family.nil? && FAMILY_BLACKLIST.
|
|
114
|
+
if !family.nil? && FAMILY_BLACKLIST.any?{ |s| s.casecmp(family) == 0 }
|
|
115
115
|
return blank_name
|
|
116
116
|
end
|
|
117
117
|
|
data/lib/dwc_agent/constants.rb
CHANGED
|
@@ -20,7 +20,6 @@ module DwcAgent
|
|
|
20
20
|
\b[,;]?\s*(?i:person\s*string)\b|
|
|
21
21
|
\b[,;]?\s*(?i:colls)\.(\b|\z)|
|
|
22
22
|
\b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
|
|
23
|
-
(?i:no\s+(data|disponible))|
|
|
24
23
|
\b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
|
|
25
24
|
[,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
|
|
26
25
|
May|Jun|Jul|Aug|Sept?|
|
|
@@ -78,7 +77,7 @@ module DwcAgent
|
|
|
78
77
|
\b\s*\(?(?i:(fe)?male)\)?\s*\b|
|
|
79
78
|
\b(?i:to\s+(sub)?spp?)\.?|
|
|
80
79
|
(?i:nom\.?\s+rev\.?)|
|
|
81
|
-
FNA|DAO|HUH|FDNMB|
|
|
80
|
+
FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|
|
|
82
81
|
AFSC\/POLISH\s+SORTING\s+CTR\.?|
|
|
83
82
|
(?i:university|museum|exhibits?)|
|
|
84
83
|
(?i:uqam)|
|
|
@@ -165,6 +164,7 @@ module DwcAgent
|
|
|
165
164
|
BLACKLIST = %r{
|
|
166
165
|
(?i:abundant)|
|
|
167
166
|
(?i:adult|juvenile)|
|
|
167
|
+
(?i:administra(d|t)or)|
|
|
168
168
|
(?i:anon)|
|
|
169
169
|
(?i:australian?)|
|
|
170
170
|
(?i:average)|
|
|
@@ -173,9 +173,10 @@ module DwcAgent
|
|
|
173
173
|
(?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
|
|
174
174
|
(?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
|
|
175
175
|
(?i:carex|salix)|
|
|
176
|
-
(
|
|
176
|
+
(?i:catalog(ue)?)|
|
|
177
177
|
(?i:herbarium|herbier|collection|collected|publication|specimen|species|describe|an(a|o)morph|isolated|recorded|inspection|define|status|lighthouse)|
|
|
178
178
|
\b\s*(?i:help)\s*\b|
|
|
179
|
+
(?i:data\s+not\s+captured)|
|
|
179
180
|
(?i:description|drawing|identification|remark|original|illustration|checklist|intermedia|measurement|indisting|series|imperfect)|
|
|
180
181
|
(?i:desconocido)|
|
|
181
182
|
(?i:exc?s?icc?at(a|i))|
|
|
@@ -193,7 +194,9 @@ module DwcAgent
|
|
|
193
194
|
(?i:univ\.)|
|
|
194
195
|
(?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
|
|
195
196
|
(?i:non\s+pr(é|e)cis(é|e))|
|
|
196
|
-
(?i:
|
|
197
|
+
(?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
|
|
198
|
+
(?i:not?\s+(entered|stated))|
|
|
199
|
+
(?i:nomenclatur(e|al)\s+adjustment)|
|
|
197
200
|
(?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
|
|
198
201
|
(?i:recreation|culture)|
|
|
199
202
|
(?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
|
|
@@ -223,8 +226,8 @@ module DwcAgent
|
|
|
223
226
|
"van",
|
|
224
227
|
"von",
|
|
225
228
|
"the",
|
|
226
|
-
"
|
|
227
|
-
"
|
|
229
|
+
"of",
|
|
230
|
+
"curators"
|
|
228
231
|
]
|
|
229
232
|
|
|
230
233
|
TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dwc_agent
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David P. Shorthouse
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-09-
|
|
11
|
+
date: 2019-09-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: namae
|