dwc_agent 3.0.1.1 → 3.0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +24 -15
- data/lib/dwc_agent/constants.rb +2 -0
- data/lib/dwc_agent/parser.rb +1 -0
- data/lib/dwc_agent/similarity.rb +4 -2
- data/lib/dwc_agent/version.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45cbccd35856eb8b283ae4edcca8275b4fb1f901a9dc9420880abfb5eaea64ff
|
4
|
+
data.tar.gz: 27d4ec41bc275ff9a9a2846c9d7bf2745a8522da6656aae2268de77465deaa92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bda64950477617d6ecf2f5aa1c3de225a2dce33856c808ca836ed7e8a36b05d120fb7380352e1b3ca79ddcfc85442fbd0432853cf34283f31965cdd1e02924b
|
7
|
+
data.tar.gz: abf6a8173c42b55e4c51b71b030838112ec10914101b60ed6cf5c67a767194a761ac351a802229850f37ab8415f19580c381f7297caa9d27891ac85201deb773
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -9,6 +9,14 @@ module DwcAgent
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def initialize
|
12
|
+
@blacklist = BLACKLIST
|
13
|
+
@given_blacklist = GIVEN_BLACKLIST
|
14
|
+
@family_blacklist = FAMILY_BLACKLIST
|
15
|
+
@particles = PARTICLES
|
16
|
+
end
|
17
|
+
|
18
|
+
def default
|
19
|
+
Namae::Name.new
|
12
20
|
end
|
13
21
|
|
14
22
|
# Cleans the passed-in namae object from the parse method and
|
@@ -19,28 +27,28 @@ module DwcAgent
|
|
19
27
|
def clean(parsed_namae)
|
20
28
|
|
21
29
|
if parsed_namae.given &&
|
22
|
-
|
23
|
-
return
|
30
|
+
@given_blacklist.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
|
31
|
+
return
|
24
32
|
end
|
25
33
|
|
26
34
|
if parsed_namae.family &&
|
27
35
|
parsed_namae.family.length == 3 &&
|
28
36
|
parsed_namae.family.count('.') == 1
|
29
|
-
return
|
37
|
+
return default
|
30
38
|
end
|
31
39
|
|
32
40
|
if parsed_namae.given && parsed_namae.given.length > 35
|
33
|
-
return
|
41
|
+
return default
|
34
42
|
end
|
35
43
|
|
36
44
|
if parsed_namae.given &&
|
37
45
|
parsed_namae.given.count('.') >= 3 &&
|
38
46
|
/\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
|
39
|
-
return
|
47
|
+
return default
|
40
48
|
end
|
41
49
|
|
42
|
-
if parsed_namae.display_order =~
|
43
|
-
return
|
50
|
+
if parsed_namae.display_order =~ @blacklist
|
51
|
+
return default
|
44
52
|
end
|
45
53
|
|
46
54
|
if parsed_namae.family &&
|
@@ -97,8 +105,8 @@ module DwcAgent
|
|
97
105
|
end
|
98
106
|
|
99
107
|
if parsed_namae.family &&
|
100
|
-
|
101
|
-
return
|
108
|
+
@family_blacklist.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
109
|
+
return default
|
102
110
|
end
|
103
111
|
|
104
112
|
if parsed_namae.family.nil? &&
|
@@ -124,7 +132,7 @@ module DwcAgent
|
|
124
132
|
if !family.nil? &&
|
125
133
|
given.nil? &&
|
126
134
|
!particle.nil? &&
|
127
|
-
|
135
|
+
!@particles.include?(particle.downcase)
|
128
136
|
given = particle.sub(/[a-z]\./, &:upcase).sub(/^(.)/) { $1.capitalize }
|
129
137
|
particle = nil
|
130
138
|
end
|
@@ -142,15 +150,15 @@ module DwcAgent
|
|
142
150
|
end
|
143
151
|
|
144
152
|
if given.nil? && !family.nil? && family.match(/^[A-Z]{2}/)
|
145
|
-
return
|
153
|
+
return default
|
146
154
|
end
|
147
155
|
|
148
|
-
if !family.nil? &&
|
149
|
-
return
|
156
|
+
if !family.nil? && @family_blacklist.any?{ |s| s.casecmp(family) == 0 }
|
157
|
+
return default
|
150
158
|
end
|
151
159
|
|
152
|
-
if !given.nil? &&
|
153
|
-
return
|
160
|
+
if !given.nil? && @given_blacklist.any?{ |s| s.casecmp(given) == 0 }
|
161
|
+
return default
|
154
162
|
end
|
155
163
|
|
156
164
|
name = {
|
@@ -165,4 +173,5 @@ module DwcAgent
|
|
165
173
|
end
|
166
174
|
|
167
175
|
end
|
176
|
+
|
168
177
|
end
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -222,6 +222,7 @@ module DwcAgent
|
|
222
222
|
(?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
|
223
223
|
(?i:carex|salix)|
|
224
224
|
(?i:catalog(ue)?)|
|
225
|
+
(?i:conservator)|
|
225
226
|
(?i:herbarium|herbier|collection|collected|publication|specimen|species|describe|an(a|o)morph|isolated|recorded|inspection|define|status|lighthouse)|
|
226
227
|
\b\s*(?i:help)\s*\b|
|
227
228
|
(?i:data\s+not\s+captured)|
|
@@ -297,6 +298,7 @@ module DwcAgent
|
|
297
298
|
"new",
|
298
299
|
"no",
|
299
300
|
"adjustment",
|
301
|
+
"agent",
|
300
302
|
"annotator",
|
301
303
|
"available",
|
302
304
|
"arachnology",
|
data/lib/dwc_agent/parser.rb
CHANGED
data/lib/dwc_agent/similarity.rb
CHANGED
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.1.
|
4
|
+
version: 3.0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|