dwc_agent 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '07829a173fc5801e1f9adb9a3c348e67ab761b23544ef16c9f6e205f440ec085'
4
- data.tar.gz: 7e974daf9385d4ccdc724cb03b0f69fcf96244b5bb2948518bbf3b4c1aafac5d
3
+ metadata.gz: 7bf17473ddabfee14ba9e7a8d451486ef8ebb058c9d1d0214b0ed9cbae996e48
4
+ data.tar.gz: cb404e507a9f9de35a5ddeebf94a6bdd4e0ad7b245e08909b1d71f50bd7595da
5
5
  SHA512:
6
- metadata.gz: cd9e27fcee8afa357d03d3a18926aa8d6d83c7c6500c1a1639976d304e22838a6b24db837f64619d2fbbb6d91ce282241d10fb5d53b27361d4286a3f3f9a810f
7
- data.tar.gz: 28ac3e485ccbb3ad13392168ebcd47b265926543c5a967a5ed770466c95b0868adf408d9519e6eebf88b846d70243c953dbd11ca1ffae02611af43bb90dedd1a
6
+ metadata.gz: a25e5863c6fa384604e399815ec1c4c9befadaf1ebdefb4ef52abdb1e9a296ace16718bab6020a32c3f1ed9c0728c46e76fdfa8c5b3133e98e48b0478c155858
7
+ data.tar.gz: 1243c70e9d479ca15dfc931e823939216873f536c3441f77e90bc9461e639e2d408e335d31418e0e31616686d24914ceffd43ae97fe9388f2ccfab443b102f1f
@@ -18,18 +18,26 @@ module DwcAgent
18
18
  def clean(parsed_namae)
19
19
  blank_name = { given: nil, family: nil }
20
20
 
21
+ if parsed_namae.family && FAMILY_BLACKLIST.include?(parsed_namae.family)
22
+ return blank_name
23
+ end
24
+
21
25
  if parsed_namae.family && parsed_namae.family.length < 2 && parsed_namae.family.count('.') == 0
22
26
  return blank_name
23
27
  end
28
+
24
29
  if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
25
30
  return blank_name
26
31
  end
32
+
27
33
  if parsed_namae.given && parsed_namae.given.length > 25
28
34
  return blank_name
29
35
  end
36
+
30
37
  if parsed_namae.given && parsed_namae.given.count('.') >= 3 && /\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
31
38
  return blank_name
32
39
  end
40
+
33
41
  if parsed_namae.display_order =~ BLACKLIST
34
42
  return blank_name
35
43
  end
@@ -44,6 +52,17 @@ module DwcAgent
44
52
  parsed_namae.given = family
45
53
  end
46
54
 
55
+ if parsed_namae.given &&
56
+ parsed_namae.family &&
57
+ parsed_namae.family.length <=3 &&
58
+ parsed_namae.family == parsed_namae.family.upcase &&
59
+ parsed_namae.given[-1] != "."
60
+ given = parsed_namae.given
61
+ family = parsed_namae.family
62
+ parsed_namae.family = given
63
+ parsed_namae.given = family
64
+ end
65
+
47
66
  if parsed_namae.given &&
48
67
  (parsed_namae.given == parsed_namae.given.upcase ||
49
68
  parsed_namae.given == parsed_namae.given.downcase) &&
@@ -1,5 +1,7 @@
1
1
  module DwcAgent
2
2
  STRIP_OUT = %r{
3
+ ^[\[{(]|
4
+ [\]})]$|
3
5
  \s*?\d+\.\d+|
4
6
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
5
7
  \b[,;]?\s*(?i:et\.?\s+al)\.?|
@@ -11,7 +13,7 @@ module DwcAgent
11
13
  \b[,;]?\s*(?i:unkn?own)\b|
12
14
  \b[,;]?\s*(?i:n/a)\b|
13
15
  \b[,;]?\s*(?i:ann?onymous)\b|
14
- \b[,;]?\s*(?i:undetermined|indeterminable|dummy|interim|accession)\b|
16
+ \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|illegible|scripsit)\)?\b|
15
17
  \b[,;]?\s*(?i:importer|gift)\:?\b|
16
18
  \b[,;]?\s*(?i:frère|frere|père|pere|soeur|sister|bro)\.?(\b|\z)|
17
19
  \b[,;]?\s*(?i:string)\b|
@@ -81,7 +83,7 @@ module DwcAgent
81
83
  (?i:university|museum|exhibits?)|
82
84
  (?i:uqam)|
83
85
  \b[,;]\s+\d+\z|
84
- ["!]|
86
+ ["!@?]|
85
87
  [,]?\d+|
86
88
  \s+\d+?(\/|\.)?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x)(\/|\.)\d+|
87
89
  [,;]\z|
@@ -97,7 +99,8 @@ module DwcAgent
97
99
  ^[-,.\s;*\d]+\s?|
98
100
  -\d?\z|
99
101
  \s*?-{2,}\s*?|
100
- ^(?i:exc?p?)[:.]\s*
102
+ ^(?i:exc?p?)[:.]\s*|
103
+ \s+de\s*$
101
104
  }x
102
105
 
103
106
  SPLIT_BY = %r{
@@ -164,7 +167,7 @@ module DwcAgent
164
167
  (?i:anon)|
165
168
  (?i:australian?)|
166
169
  (?i:average)|
167
- (?i:believe|unclear|illegible|none|suggested|(dis)?agrees?)|approach|
170
+ (?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
168
171
  (?i:barcod)|
169
172
  (?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
170
173
  (?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
@@ -189,6 +192,7 @@ module DwcAgent
189
192
  (?i:univ\.)|
190
193
  (?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
191
194
  (?i:non\s+pr(é|e)cis(é|e))|
195
+ (?i:not?\s+stated)|
192
196
  (?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
193
197
  (?i:recreation|culture)|
194
198
  (?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
@@ -209,9 +213,16 @@ module DwcAgent
209
213
  (?i:unidentified|unspecified|unk?nown|unnamed|unread|unmistak|no agent)|
210
214
  (?i:urn\:)|
211
215
  (?i:usda|ucla)|
212
- (?i:workshop|garden|farm|jardin|public)
216
+ (?i:workshop|garden|farm|jardin|public)|
217
+ ^\s*?de\s*?$
213
218
  }x
214
219
 
215
- TITLE = /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
220
+ FAMILY_BLACKLIST = [
221
+ "der",
222
+ "van",
223
+ "von"
224
+ ]
225
+
226
+ TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
216
227
 
217
228
  end
@@ -2,8 +2,8 @@ module DwcAgent
2
2
  class Version
3
3
 
4
4
  MAJOR = 0
5
- MINOR = 3
6
- PATCH = 0
5
+ MINOR = 4
6
+ PATCH = 1
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-02 00:00:00.000000000 Z
11
+ date: 2019-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae