dwc_agent 0.3.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c26dc44596aa62cfbf94db6aca41806ad6567367da35d23c746f19a036515121
4
- data.tar.gz: 1029da746bd324bec825363f69cb2d77d66fd211fa949337b7602e43193b2ae4
3
+ metadata.gz: f54434352d90dff97e7c733fbc2fb53117380d5f04d6841d9796f48a5603dc18
4
+ data.tar.gz: 1ca4ae5b5bf104b51fa030cfe3984e428ab4325df266a6c90de50dff134038c6
5
5
  SHA512:
6
- metadata.gz: a9ed1698d69ab48831a91d43c337e42efcfe6d1c97e00c8a7602465777302f93038c4d5b2a4eca244704cc59b7d49339f6d8489424e7b0ec0be0ba7e31eecab3
7
- data.tar.gz: 6ad5912ea9977238490ce73c8a05e99488b71688074d614424ac086998167f2a52118316e167646ab0a8f317a13d189d9969acd62dc903f9594138732e70c49a
6
+ metadata.gz: bd1a44b5926abba1a7124493980fcf8e91aebc3bf854bca8c92da973299a345f8eb79cd3a02696e93d1c4426a6fa3bd5ff3414239c884566ed5de0679b0e3760
7
+ data.tar.gz: 2dcea697b1c7d589227469e5a69a3da40f71886c01e29a714557276bcdc0d3a2755cc0ff89bf36dcd70388fd640917384b776ace8de4bd3352cdea217d08ae3e
@@ -18,18 +18,26 @@ module DwcAgent
18
18
  def clean(parsed_namae)
19
19
  blank_name = { given: nil, family: nil }
20
20
 
21
+ if parsed_namae.family && FAMILY_BLACKLIST.include?(parsed_namae.family)
22
+ return blank_name
23
+ end
24
+
21
25
  if parsed_namae.family && parsed_namae.family.length < 2 && parsed_namae.family.count('.') == 0
22
26
  return blank_name
23
27
  end
28
+
24
29
  if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
25
30
  return blank_name
26
31
  end
32
+
27
33
  if parsed_namae.given && parsed_namae.given.length > 25
28
34
  return blank_name
29
35
  end
36
+
30
37
  if parsed_namae.given && parsed_namae.given.count('.') >= 3 && /\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
31
38
  return blank_name
32
39
  end
40
+
33
41
  if parsed_namae.display_order =~ BLACKLIST
34
42
  return blank_name
35
43
  end
@@ -44,6 +52,17 @@ module DwcAgent
44
52
  parsed_namae.given = family
45
53
  end
46
54
 
55
+ if parsed_namae.given &&
56
+ parsed_namae.family &&
57
+ parsed_namae.family.length <=3 &&
58
+ parsed_namae.family == parsed_namae.family.upcase &&
59
+ parsed_namae.given[-1] != "."
60
+ given = parsed_namae.given
61
+ family = parsed_namae.family
62
+ parsed_namae.family = given
63
+ parsed_namae.given = family
64
+ end
65
+
47
66
  if parsed_namae.given &&
48
67
  (parsed_namae.given == parsed_namae.given.upcase ||
49
68
  parsed_namae.given == parsed_namae.given.downcase) &&
@@ -92,6 +111,10 @@ module DwcAgent
92
111
  return blank_name
93
112
  end
94
113
 
114
+ if !family.nil? && FAMILY_BLACKLIST.include?(family)
115
+ return blank_name
116
+ end
117
+
95
118
  { given: given, family: family }
96
119
  end
97
120
 
@@ -83,7 +83,7 @@ module DwcAgent
83
83
  (?i:university|museum|exhibits?)|
84
84
  (?i:uqam)|
85
85
  \b[,;]\s+\d+\z|
86
- ["!]|
86
+ ["!@?]|
87
87
  [,]?\d+|
88
88
  \s+\d+?(\/|\.)?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x)(\/|\.)\d+|
89
89
  [,;]\z|
@@ -99,7 +99,8 @@ module DwcAgent
99
99
  ^[-,.\s;*\d]+\s?|
100
100
  -\d?\z|
101
101
  \s*?-{2,}\s*?|
102
- ^(?i:exc?p?)[:.]\s*
102
+ ^(?i:exc?p?)[:.]\s*|
103
+ \s+de\s*$
103
104
  }x
104
105
 
105
106
  SPLIT_BY = %r{
@@ -146,7 +147,8 @@ module DwcAgent
146
147
  '{' => '',
147
148
  '}' => '',
148
149
  '@' => '',
149
- '%' => ''
150
+ '%' => '',
151
+ '\\' => ''
150
152
  }
151
153
 
152
154
  PHRASE_SUBS = {
@@ -166,7 +168,7 @@ module DwcAgent
166
168
  (?i:anon)|
167
169
  (?i:australian?)|
168
170
  (?i:average)|
169
- (?i:believe|unclear|illegible|none|suggested|(dis)?agrees?)|approach|
171
+ (?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
170
172
  (?i:barcod)|
171
173
  (?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
172
174
  (?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
@@ -191,6 +193,7 @@ module DwcAgent
191
193
  (?i:univ\.)|
192
194
  (?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
193
195
  (?i:non\s+pr(é|e)cis(é|e))|
196
+ (?i:not?\s+stated)|
194
197
  (?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
195
198
  (?i:recreation|culture)|
196
199
  (?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
@@ -211,9 +214,19 @@ module DwcAgent
211
214
  (?i:unidentified|unspecified|unk?nown|unnamed|unread|unmistak|no agent)|
212
215
  (?i:urn\:)|
213
216
  (?i:usda|ucla)|
214
- (?i:workshop|garden|farm|jardin|public)
217
+ (?i:workshop|garden|farm|jardin|public)|
218
+ ^\s*?de\s*?$
215
219
  }x
216
220
 
217
- TITLE = /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
221
+ FAMILY_BLACKLIST = [
222
+ "der",
223
+ "van",
224
+ "von",
225
+ "the",
226
+ "The",
227
+ "Catalog"
228
+ ]
229
+
230
+ TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
218
231
 
219
232
  end
@@ -2,8 +2,8 @@ module DwcAgent
2
2
  class Version
3
3
 
4
4
  MAJOR = 0
5
- MINOR = 3
6
- PATCH = 1
5
+ MINOR = 4
6
+ PATCH = 2
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-02 00:00:00.000000000 Z
11
+ date: 2019-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae