dwc_agent 3.0.1.1 → 3.0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +24 -15
- data/lib/dwc_agent/constants.rb +2 -0
- data/lib/dwc_agent/parser.rb +1 -0
- data/lib/dwc_agent/similarity.rb +4 -2
- data/lib/dwc_agent/version.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45cbccd35856eb8b283ae4edcca8275b4fb1f901a9dc9420880abfb5eaea64ff
|
4
|
+
data.tar.gz: 27d4ec41bc275ff9a9a2846c9d7bf2745a8522da6656aae2268de77465deaa92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bda64950477617d6ecf2f5aa1c3de225a2dce33856c808ca836ed7e8a36b05d120fb7380352e1b3ca79ddcfc85442fbd0432853cf34283f31965cdd1e02924b
|
7
|
+
data.tar.gz: abf6a8173c42b55e4c51b71b030838112ec10914101b60ed6cf5c67a767194a761ac351a802229850f37ab8415f19580c381f7297caa9d27891ac85201deb773
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -9,6 +9,14 @@ module DwcAgent
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def initialize
|
12
|
+
@blacklist = BLACKLIST
|
13
|
+
@given_blacklist = GIVEN_BLACKLIST
|
14
|
+
@family_blacklist = FAMILY_BLACKLIST
|
15
|
+
@particles = PARTICLES
|
16
|
+
end
|
17
|
+
|
18
|
+
def default
|
19
|
+
Namae::Name.new
|
12
20
|
end
|
13
21
|
|
14
22
|
# Cleans the passed-in namae object from the parse method and
|
@@ -19,28 +27,28 @@ module DwcAgent
|
|
19
27
|
def clean(parsed_namae)
|
20
28
|
|
21
29
|
if parsed_namae.given &&
|
22
|
-
|
23
|
-
return
|
30
|
+
@given_blacklist.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
|
31
|
+
return
|
24
32
|
end
|
25
33
|
|
26
34
|
if parsed_namae.family &&
|
27
35
|
parsed_namae.family.length == 3 &&
|
28
36
|
parsed_namae.family.count('.') == 1
|
29
|
-
return
|
37
|
+
return default
|
30
38
|
end
|
31
39
|
|
32
40
|
if parsed_namae.given && parsed_namae.given.length > 35
|
33
|
-
return
|
41
|
+
return default
|
34
42
|
end
|
35
43
|
|
36
44
|
if parsed_namae.given &&
|
37
45
|
parsed_namae.given.count('.') >= 3 &&
|
38
46
|
/\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
|
39
|
-
return
|
47
|
+
return default
|
40
48
|
end
|
41
49
|
|
42
|
-
if parsed_namae.display_order =~
|
43
|
-
return
|
50
|
+
if parsed_namae.display_order =~ @blacklist
|
51
|
+
return default
|
44
52
|
end
|
45
53
|
|
46
54
|
if parsed_namae.family &&
|
@@ -97,8 +105,8 @@ module DwcAgent
|
|
97
105
|
end
|
98
106
|
|
99
107
|
if parsed_namae.family &&
|
100
|
-
|
101
|
-
return
|
108
|
+
@family_blacklist.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
109
|
+
return default
|
102
110
|
end
|
103
111
|
|
104
112
|
if parsed_namae.family.nil? &&
|
@@ -124,7 +132,7 @@ module DwcAgent
|
|
124
132
|
if !family.nil? &&
|
125
133
|
given.nil? &&
|
126
134
|
!particle.nil? &&
|
127
|
-
|
135
|
+
!@particles.include?(particle.downcase)
|
128
136
|
given = particle.sub(/[a-z]\./, &:upcase).sub(/^(.)/) { $1.capitalize }
|
129
137
|
particle = nil
|
130
138
|
end
|
@@ -142,15 +150,15 @@ module DwcAgent
|
|
142
150
|
end
|
143
151
|
|
144
152
|
if given.nil? && !family.nil? && family.match(/^[A-Z]{2}/)
|
145
|
-
return
|
153
|
+
return default
|
146
154
|
end
|
147
155
|
|
148
|
-
if !family.nil? &&
|
149
|
-
return
|
156
|
+
if !family.nil? && @family_blacklist.any?{ |s| s.casecmp(family) == 0 }
|
157
|
+
return default
|
150
158
|
end
|
151
159
|
|
152
|
-
if !given.nil? &&
|
153
|
-
return
|
160
|
+
if !given.nil? && @given_blacklist.any?{ |s| s.casecmp(given) == 0 }
|
161
|
+
return default
|
154
162
|
end
|
155
163
|
|
156
164
|
name = {
|
@@ -165,4 +173,5 @@ module DwcAgent
|
|
165
173
|
end
|
166
174
|
|
167
175
|
end
|
176
|
+
|
168
177
|
end
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -222,6 +222,7 @@ module DwcAgent
|
|
222
222
|
(?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
|
223
223
|
(?i:carex|salix)|
|
224
224
|
(?i:catalog(ue)?)|
|
225
|
+
(?i:conservator)|
|
225
226
|
(?i:herbarium|herbier|collection|collected|publication|specimen|species|describe|an(a|o)morph|isolated|recorded|inspection|define|status|lighthouse)|
|
226
227
|
\b\s*(?i:help)\s*\b|
|
227
228
|
(?i:data\s+not\s+captured)|
|
@@ -297,6 +298,7 @@ module DwcAgent
|
|
297
298
|
"new",
|
298
299
|
"no",
|
299
300
|
"adjustment",
|
301
|
+
"agent",
|
300
302
|
"annotator",
|
301
303
|
"available",
|
302
304
|
"arachnology",
|
data/lib/dwc_agent/parser.rb
CHANGED
data/lib/dwc_agent/similarity.rb
CHANGED
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.1.
|
4
|
+
version: 3.0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|