dwc_agent 3.0.8.0 → 3.0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +6 -2
- data/lib/dwc_agent/parser.rb +2 -0
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c0d83fd4dba1ddb6b9976bb1dea4c95a5365cf31b9c1afc335e035f70317a40f
|
|
4
|
+
data.tar.gz: 2fc6de4a6de283d9cf8d813306bb034ca67e20e684ce6136d6f2366512633d34
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d6ca8f0a7507c8a8d09b183f59ca47d97ff30a0de3e017da155e0b1e57ae5d34df4328ae9bdbb2fd4682b0d57afac7e251baebaa305fea5bc46b2886b22e5385
|
|
7
|
+
data.tar.gz: b9ad4777ade9052a3cd54926173c77973f899e76b3f65f43f31b45481de746e70daa4ecdcbb0b190567c27f3496d69cb5093d35522243b284ac5d4931fbe09a0
|
data/lib/dwc_agent/constants.rb
CHANGED
|
@@ -161,6 +161,10 @@ module DwcAgent
|
|
|
161
161
|
\b(?i:via|from)\s*\b
|
|
162
162
|
}x
|
|
163
163
|
|
|
164
|
+
POST_STRIP_TIDY = %r{
|
|
165
|
+
^\s*[&,;]\s*
|
|
166
|
+
}x
|
|
167
|
+
|
|
164
168
|
CHAR_SUBS = {
|
|
165
169
|
'"' => '\'',
|
|
166
170
|
'|' => ' | ',
|
|
@@ -210,7 +214,7 @@ module DwcAgent
|
|
|
210
214
|
"^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
|
|
211
215
|
"(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
|
|
212
216
|
"^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?){1,})$" => "\\1, \\2",
|
|
213
|
-
"([[:alpha:]]*),?\\s
|
|
217
|
+
"([[:alpha:]]*),?\\s*(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
|
|
214
218
|
"^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]]{2,})\\s+([[:alpha:]]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
|
|
215
219
|
"^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
|
|
216
220
|
"^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{2,})$" => "\\1 | \\2 | \\3",
|
|
@@ -222,7 +226,7 @@ module DwcAgent
|
|
|
222
226
|
(?i:abundant)|
|
|
223
227
|
(?i:adult|juvenile)|
|
|
224
228
|
(?i:administra(d|t)or)|
|
|
225
|
-
(?i:anon)
|
|
229
|
+
^(?i:anon)$|
|
|
226
230
|
(?i:australian?)|
|
|
227
231
|
(?i:average)|
|
|
228
232
|
(?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
|
data/lib/dwc_agent/parser.rb
CHANGED
|
@@ -18,6 +18,7 @@ module DwcAgent
|
|
|
18
18
|
}
|
|
19
19
|
@namae = Namae::Parser.new(options)
|
|
20
20
|
@strip_out_regex = Regexp.new STRIP_OUT.to_s
|
|
21
|
+
@tidy_remains_regex = Regexp.new POST_STRIP_TIDY.to_s
|
|
21
22
|
@char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
|
|
22
23
|
@phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
|
|
23
24
|
@residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
|
|
@@ -31,6 +32,7 @@ module DwcAgent
|
|
|
31
32
|
def parse(name)
|
|
32
33
|
return [] if name.nil? || name == ""
|
|
33
34
|
name.gsub!(@strip_out_regex, ' ')
|
|
35
|
+
name.gsub!(@tidy_remains_regex, '')
|
|
34
36
|
name.gsub!(Regexp.union(@char_subs_regex, @phrase_subs_regex), CHAR_SUBS.merge(PHRASE_SUBS))
|
|
35
37
|
@separators.each{|k| name.gsub!(k[0], k[1])}
|
|
36
38
|
name.gsub!(@residual_terminators_regex, '')
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dwc_agent
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.0.
|
|
4
|
+
version: 3.0.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David P. Shorthouse
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-09-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: namae
|