dwc_agent 1.5.0.5 → 1.5.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +4 -4
- data/lib/dwc_agent/constants.rb +26 -6
- data/lib/dwc_agent/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b3360ae9a26c61f08d3a687e8c4e1af9e19334714746a976cb4c90e93d72a63
|
4
|
+
data.tar.gz: 4ada728f04c124ec1fbd6a9b45c1ecba80f45319f051a087c2074b2417110478
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ced6e4cd9352b267ee8662993d4a39c9aa6e35965c550fef33993bd3c7c40feb07e7ad1ac8e6c60531f843b5e3d18a5bc0caafb38de41b55f0ad706be898d81a
|
7
|
+
data.tar.gz: 4c3c76a1f3cf17114d9e23a2ec19f40772fe38b5bd1b6dd49264cf82081dac797b182ae0a2cd5d90732a0f3cd947251ea28830f7769a82d4bfe8a10ca166213e
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -18,10 +18,6 @@ module DwcAgent
|
|
18
18
|
def clean(parsed_namae)
|
19
19
|
blank_name = { title: nil, appellation: nil, given: nil, particle: nil, family: nil, suffix: nil }
|
20
20
|
|
21
|
-
if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
22
|
-
return blank_name
|
23
|
-
end
|
24
|
-
|
25
21
|
if parsed_namae.given && GIVEN_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
|
26
22
|
return blank_name
|
27
23
|
end
|
@@ -79,6 +75,10 @@ module DwcAgent
|
|
79
75
|
parsed_namae.given = NameCase(parsed_namae.given)
|
80
76
|
end
|
81
77
|
|
78
|
+
if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
79
|
+
return blank_name
|
80
|
+
end
|
81
|
+
|
82
82
|
parsed_namae.normalize_initials
|
83
83
|
|
84
84
|
family = parsed_namae.family.gsub(/\.\z/, '').strip rescue nil
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -54,10 +54,11 @@ module DwcAgent
|
|
54
54
|
(?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
|
55
55
|
\b\s*(?i:maybe)\s*\b|
|
56
56
|
\b\s*(?i:prob)\.\s*\b|
|
57
|
-
\(?[,]?\s*?(?i:(local)?\s?
|
57
|
+
\(?[,]?\s*?(?i:(local)?\s?collectors?|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
|
58
58
|
\b[.-–,;:]?\s*(?i:department|faculty)\s*?(?i:of)?\s*?(?i:entomology|biology|zoology)|
|
59
59
|
(?i:Engº|Agrº|Fcº|Drº|Mº|Profº|Dº|Fº)|
|
60
60
|
(?i:fide)\:?\s*\b|
|
61
|
+
(?i:first\s+name\s+unknown)|
|
61
62
|
(?i:game\s+dept)\.?\s*\b|
|
62
63
|
(?i:see\s+notes?\s*(inside)?)|
|
63
64
|
(?i:see\s+letter\s+enclosed)|
|
@@ -67,16 +68,18 @@ module DwcAgent
|
|
67
68
|
\(?(?i:source)\(?|
|
68
69
|
(?i:according\s+to)|
|
69
70
|
(?i:lanuv)\d+|
|
71
|
+
\b\s*name\b|
|
72
|
+
\b\s*lost\b|
|
70
73
|
(?i:nswobs)|
|
71
74
|
ORCID|
|
72
75
|
MRI(\s|-)PAS|
|
73
76
|
urn\:qm\.qld\.gov\.au\:collector|
|
74
77
|
(?i:University\s+of\s+(Southern\s+)?California(,\s+Berkeley)?)|
|
75
|
-
(?i:
|
76
|
-
(?i:
|
78
|
+
(?i:field\s+museum\s+of\s+natural\s+history)|
|
79
|
+
(?i:american\s+museum\s+of\s+natural\s+history)|
|
77
80
|
(?i:The\s+Paleontological\s+Research\s+Institution)|
|
78
81
|
(?i:museums?\s+victoria)|
|
79
|
-
\b\s*(?i:
|
82
|
+
\b\s*(?i:united\s+states|russia)\s*\b|
|
80
83
|
(?i:revised|photograph|fruits\s+only)|
|
81
84
|
-?\s*(?i:sight\s+(id|identifi?cation))\.?\s*\b|
|
82
85
|
-?\s*(?i:synonym(y|ie))|
|
@@ -244,7 +247,7 @@ module DwcAgent
|
|
244
247
|
(?i:though)|
|
245
248
|
(?i:texas\s+instruments?)\s*?(for)?|
|
246
249
|
(?:tropical)|
|
247
|
-
(?i:toward|seen
|
250
|
+
(?i:toward|seen\s+at)|
|
248
251
|
(?i:unidentified|unspecified|unk?nown?|unnamed|unread|unmistak|no agent)|
|
249
252
|
(?i:urn\:)|
|
250
253
|
(?i:usda|ucla)|
|
@@ -253,30 +256,47 @@ module DwcAgent
|
|
253
256
|
}x
|
254
257
|
|
255
258
|
FAMILY_BLACKLIST = [
|
259
|
+
"ap",
|
256
260
|
"da",
|
261
|
+
"de",
|
257
262
|
"de'",
|
258
263
|
"del",
|
259
264
|
"der",
|
265
|
+
"di",
|
266
|
+
"do",
|
267
|
+
"dos",
|
260
268
|
"du",
|
261
269
|
"el",
|
270
|
+
"le",
|
271
|
+
"la",
|
262
272
|
"van",
|
263
273
|
"von",
|
264
274
|
"the",
|
265
275
|
"of",
|
266
276
|
"adjustment",
|
277
|
+
"annotator",
|
267
278
|
"available",
|
268
279
|
"arachnology",
|
269
280
|
"catalogue",
|
270
281
|
"curators",
|
271
282
|
"data",
|
283
|
+
"details",
|
284
|
+
"determiner",
|
272
285
|
"determination",
|
273
286
|
"dissection",
|
274
287
|
"entered",
|
288
|
+
"erased",
|
275
289
|
"indecipherable",
|
276
290
|
"nomenclatural",
|
277
291
|
"orig",
|
278
292
|
"registration",
|
279
|
-
"science"
|
293
|
+
"science",
|
294
|
+
"wg",
|
295
|
+
"wm",
|
296
|
+
"wn",
|
297
|
+
"zw",
|
298
|
+
"zz",
|
299
|
+
"z-"
|
280
300
|
]
|
281
301
|
|
282
302
|
GIVEN_BLACKLIST = [
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.0.
|
4
|
+
version: 1.5.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|
@@ -102,7 +102,7 @@ homepage: https://github.com/bionomia/dwc_agent
|
|
102
102
|
licenses:
|
103
103
|
- MIT
|
104
104
|
metadata: {}
|
105
|
-
post_install_message:
|
105
|
+
post_install_message:
|
106
106
|
rdoc_options:
|
107
107
|
- "--encoding"
|
108
108
|
- UTF-8
|
@@ -119,8 +119,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: '0'
|
121
121
|
requirements: []
|
122
|
-
rubygems_version: 3.
|
123
|
-
signing_key:
|
122
|
+
rubygems_version: 3.1.2
|
123
|
+
signing_key:
|
124
124
|
specification_version: 4
|
125
125
|
summary: Parse Darwin Core agent terms such as recordedBy and identifiedBy
|
126
126
|
test_files: []
|