dwc_agent 0.3.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 94bd1d5cf3ecff4b3b30f82e68bbe2477db29e0ba8ce7d6c5e3d5220bcf46ca9
4
- data.tar.gz: 2e0668f5aa4f76f9e05b83ba0d1514b0a9d89949904e8caff30cd4c450c8aa5b
3
+ metadata.gz: 69d6e0f6c0f9d59801d0e53a89dd73896f0ac5a8230156879227ea6efe14bb7d
4
+ data.tar.gz: 1330921589cbcad22273c4b5cae821eba7266cc4357756498ae3b6d4f029e3f8
5
5
  SHA512:
6
- metadata.gz: d5c1f56b49826c72715258299d915b586d0aea88165de1c26a68b68a006df2ae3232c9fbb4bbdc9382240b90674e84b27b414089fe0bd7331e089063734f0773
7
- data.tar.gz: c6c5c8aaba8d3b8b7305d430a154d14b4033f8a7f0d931161165d85baec0d93f0f81554299cdbac2f47734775a5189a9c3c5fc6ef50d801292da97c57fbea9cd
6
+ metadata.gz: 735057f853b259f4d7fcb9f4c73d2f193f9a0e363d439a8bf894bbd9f878e8667cd14979c10cf138c7cc007f83cd09b7d5f9f21e0482b6303f73a40c788a957a
7
+ data.tar.gz: 8760dc0d9976543fe6ce448e2129b94e2d86d5a17f8e0b4b297f4dd23e8c4e4d73b4c7af4863e5edfa6abbbdb83181e6364e8064b0c73f185b0370c3ea1f597c
@@ -16,20 +16,28 @@ module DwcAgent
16
16
  # @param parsed_namae [Object] the namae object
17
17
  # @return [Hash] the given, family hash
18
18
  def clean(parsed_namae)
19
- blank_name = { given: nil, family: nil }
19
+ blank_name = { given: nil, family: nil, particle: nil }
20
+
21
+ if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
22
+ return blank_name
23
+ end
20
24
 
21
25
  if parsed_namae.family && parsed_namae.family.length < 2 && parsed_namae.family.count('.') == 0
22
26
  return blank_name
23
27
  end
28
+
24
29
  if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
25
30
  return blank_name
26
31
  end
32
+
27
33
  if parsed_namae.given && parsed_namae.given.length > 25
28
34
  return blank_name
29
35
  end
36
+
30
37
  if parsed_namae.given && parsed_namae.given.count('.') >= 3 && /\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
31
38
  return blank_name
32
39
  end
40
+
33
41
  if parsed_namae.display_order =~ BLACKLIST
34
42
  return blank_name
35
43
  end
@@ -91,6 +99,10 @@ module DwcAgent
91
99
  particle = nil
92
100
  end
93
101
 
102
+ if !particle.nil? && particle.include?(".")
103
+ particle = nil
104
+ end
105
+
94
106
  if !family.nil? && (family == family.upcase || family == family.downcase)
95
107
  family = NameCase(family)
96
108
  end
@@ -103,7 +115,11 @@ module DwcAgent
103
115
  return blank_name
104
116
  end
105
117
 
106
- { given: given, family: family }
118
+ if !family.nil? && FAMILY_BLACKLIST.any?{ |s| s.casecmp(family) == 0 }
119
+ return blank_name
120
+ end
121
+
122
+ { given: given, family: family, particle: particle }
107
123
  end
108
124
 
109
125
  end
@@ -20,7 +20,6 @@ module DwcAgent
20
20
  \b[,;]?\s*(?i:person\s*string)\b|
21
21
  \b[,;]?\s*(?i:colls)\.(\b|\z)|
22
22
  \b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
23
- (?i:no\s+(data|disponible))|
24
23
  \b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
25
24
  [,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
26
25
  May|Jun|Jul|Aug|Sept?|
@@ -78,10 +77,11 @@ module DwcAgent
78
77
  \b\s*\(?(?i:(fe)?male)\)?\s*\b|
79
78
  \b(?i:to\s+(sub)?spp?)\.?|
80
79
  (?i:nom\.?\s+rev\.?)|
81
- FNA|DAO|HUH|FDNMB|
80
+ FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|
82
81
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
83
82
  (?i:university|museum|exhibits?)|
84
83
  (?i:uqam)|
84
+ (?i:sem\s+(colec?tor|data))|
85
85
  \b[,;]\s+\d+\z|
86
86
  ["!@?]|
87
87
  [,]?\d+|
@@ -147,7 +147,8 @@ module DwcAgent
147
147
  '{' => '',
148
148
  '}' => '',
149
149
  '@' => '',
150
- '%' => ''
150
+ '%' => '',
151
+ '\\' => ''
151
152
  }
152
153
 
153
154
  PHRASE_SUBS = {
@@ -164,6 +165,7 @@ module DwcAgent
164
165
  BLACKLIST = %r{
165
166
  (?i:abundant)|
166
167
  (?i:adult|juvenile)|
168
+ (?i:administra(d|t)or)|
167
169
  (?i:anon)|
168
170
  (?i:australian?)|
169
171
  (?i:average)|
@@ -172,9 +174,10 @@ module DwcAgent
172
174
  (?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
173
175
  (?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
174
176
  (?i:carex|salix)|
175
- (?:catalog)|
177
+ (?i:catalog(ue)?)|
176
178
  (?i:herbarium|herbier|collection|collected|publication|specimen|species|describe|an(a|o)morph|isolated|recorded|inspection|define|status|lighthouse)|
177
179
  \b\s*(?i:help)\s*\b|
180
+ (?i:data\s+not\s+captured)|
178
181
  (?i:description|drawing|identification|remark|original|illustration|checklist|intermedia|measurement|indisting|series|imperfect)|
179
182
  (?i:desconocido)|
180
183
  (?i:exc?s?icc?at(a|i))|
@@ -192,6 +195,9 @@ module DwcAgent
192
195
  (?i:univ\.)|
193
196
  (?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
194
197
  (?i:non\s+pr(é|e)cis(é|e))|
198
+ (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
199
+ (?i:not?\s+(entered|stated))|
200
+ (?i:nomenclatur(e|al)\s+adjustment)|
195
201
  (?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
196
202
  (?i:recreation|culture)|
197
203
  (?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
@@ -216,6 +222,17 @@ module DwcAgent
216
222
  ^\s*?de\s*?$
217
223
  }x
218
224
 
225
+ FAMILY_BLACKLIST = [
226
+ "der",
227
+ "van",
228
+ "von",
229
+ "the",
230
+ "of",
231
+ "curators",
232
+ "nomenclatural",
233
+ "adjustment"
234
+ ]
235
+
219
236
  TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
220
237
 
221
238
  end
@@ -1,9 +1,9 @@
1
1
  module DwcAgent
2
2
  class Version
3
3
 
4
- MAJOR = 0
5
- MINOR = 3
6
- PATCH = 3
4
+ MAJOR = 1
5
+ MINOR = 0
6
+ PATCH = 0
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-24 00:00:00.000000000 Z
11
+ date: 2019-09-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae