dwc_agent 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +11 -0
- data/lib/dwc_agent/constants.rb +6 -4
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfbc3ad3469ce14a1496befc2ffae9c25a17f2526dc02f1e8e7c14c7e7b431f0
|
4
|
+
data.tar.gz: ddeb0f4eeb81450c1f8ec0d2233e037340650570a14f77b2fa08b5d0e65ee9ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8add8048e7c1ef15d9974d198cb9e74c8dd1ec210530fbe4088b4dedd993de4e94cf01ed5daf58e21bb353eaf6d8192f377fb0ba93bb01371112bdf491453de
|
7
|
+
data.tar.gz: efca1ecff92dee36596d0ae655678e892b2729c47333a6af25c0e68e0ad525b4e69ecd573524498eeb5ca4e87b96f5e6e4133a5ce2200f43e5b01def0c0ca19b
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -44,6 +44,17 @@ module DwcAgent
|
|
44
44
|
parsed_namae.given = family
|
45
45
|
end
|
46
46
|
|
47
|
+
if parsed_namae.given &&
|
48
|
+
parsed_namae.family &&
|
49
|
+
parsed_namae.family.length <=3 &&
|
50
|
+
parsed_namae.family == parsed_namae.family.upcase &&
|
51
|
+
parsed_namae.given[-1] != "."
|
52
|
+
given = parsed_namae.given
|
53
|
+
family = parsed_namae.family
|
54
|
+
parsed_namae.family = given
|
55
|
+
parsed_namae.given = family
|
56
|
+
end
|
57
|
+
|
47
58
|
if parsed_namae.given &&
|
48
59
|
(parsed_namae.given == parsed_namae.given.upcase ||
|
49
60
|
parsed_namae.given == parsed_namae.given.downcase) &&
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -83,7 +83,7 @@ module DwcAgent
|
|
83
83
|
(?i:university|museum|exhibits?)|
|
84
84
|
(?i:uqam)|
|
85
85
|
\b[,;]\s+\d+\z|
|
86
|
-
["
|
86
|
+
["!@?]|
|
87
87
|
[,]?\d+|
|
88
88
|
\s+\d+?(\/|\.)?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x)(\/|\.)\d+|
|
89
89
|
[,;]\z|
|
@@ -99,7 +99,8 @@ module DwcAgent
|
|
99
99
|
^[-,.\s;*\d]+\s?|
|
100
100
|
-\d?\z|
|
101
101
|
\s*?-{2,}\s*?|
|
102
|
-
^(?i:exc?p?)[:.]\s
|
102
|
+
^(?i:exc?p?)[:.]\s*|
|
103
|
+
\s+de\s*$
|
103
104
|
}x
|
104
105
|
|
105
106
|
SPLIT_BY = %r{
|
@@ -166,7 +167,7 @@ module DwcAgent
|
|
166
167
|
(?i:anon)|
|
167
168
|
(?i:australian?)|
|
168
169
|
(?i:average)|
|
169
|
-
(?i:believe|unclear|
|
170
|
+
(?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
|
170
171
|
(?i:barcod)|
|
171
172
|
(?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
|
172
173
|
(?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
|
@@ -211,7 +212,8 @@ module DwcAgent
|
|
211
212
|
(?i:unidentified|unspecified|unk?nown|unnamed|unread|unmistak|no agent)|
|
212
213
|
(?i:urn\:)|
|
213
214
|
(?i:usda|ucla)|
|
214
|
-
(?i:workshop|garden|farm|jardin|public)
|
215
|
+
(?i:workshop|garden|farm|jardin|public)|
|
216
|
+
^\s*?de\s*?$
|
215
217
|
}x
|
216
218
|
|
217
219
|
TITLE = /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|