dwc_agent 3.0.8.0 → 3.0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +6 -2
- data/lib/dwc_agent/parser.rb +2 -0
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0d83fd4dba1ddb6b9976bb1dea4c95a5365cf31b9c1afc335e035f70317a40f
|
4
|
+
data.tar.gz: 2fc6de4a6de283d9cf8d813306bb034ca67e20e684ce6136d6f2366512633d34
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6ca8f0a7507c8a8d09b183f59ca47d97ff30a0de3e017da155e0b1e57ae5d34df4328ae9bdbb2fd4682b0d57afac7e251baebaa305fea5bc46b2886b22e5385
|
7
|
+
data.tar.gz: b9ad4777ade9052a3cd54926173c77973f899e76b3f65f43f31b45481de746e70daa4ecdcbb0b190567c27f3496d69cb5093d35522243b284ac5d4931fbe09a0
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -161,6 +161,10 @@ module DwcAgent
|
|
161
161
|
\b(?i:via|from)\s*\b
|
162
162
|
}x
|
163
163
|
|
164
|
+
POST_STRIP_TIDY = %r{
|
165
|
+
^\s*[&,;]\s*
|
166
|
+
}x
|
167
|
+
|
164
168
|
CHAR_SUBS = {
|
165
169
|
'"' => '\'',
|
166
170
|
'|' => ' | ',
|
@@ -210,7 +214,7 @@ module DwcAgent
|
|
210
214
|
"^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
|
211
215
|
"(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
|
212
216
|
"^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?){1,})$" => "\\1, \\2",
|
213
|
-
"([[:alpha:]]*),?\\s
|
217
|
+
"([[:alpha:]]*),?\\s*(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
|
214
218
|
"^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]]{2,})\\s+([[:alpha:]]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
|
215
219
|
"^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
|
216
220
|
"^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?(?i:and|&|et|e|,)\\s+([A-Z][[:alpha:]]{2,})$" => "\\1 | \\2 | \\3",
|
@@ -222,7 +226,7 @@ module DwcAgent
|
|
222
226
|
(?i:abundant)|
|
223
227
|
(?i:adult|juvenile)|
|
224
228
|
(?i:administra(d|t)or)|
|
225
|
-
(?i:anon)
|
229
|
+
^(?i:anon)$|
|
226
230
|
(?i:australian?)|
|
227
231
|
(?i:average)|
|
228
232
|
(?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
|
data/lib/dwc_agent/parser.rb
CHANGED
@@ -18,6 +18,7 @@ module DwcAgent
|
|
18
18
|
}
|
19
19
|
@namae = Namae::Parser.new(options)
|
20
20
|
@strip_out_regex = Regexp.new STRIP_OUT.to_s
|
21
|
+
@tidy_remains_regex = Regexp.new POST_STRIP_TIDY.to_s
|
21
22
|
@char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
|
22
23
|
@phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
|
23
24
|
@residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
|
@@ -31,6 +32,7 @@ module DwcAgent
|
|
31
32
|
def parse(name)
|
32
33
|
return [] if name.nil? || name == ""
|
33
34
|
name.gsub!(@strip_out_regex, ' ')
|
35
|
+
name.gsub!(@tidy_remains_regex, '')
|
34
36
|
name.gsub!(Regexp.union(@char_subs_regex, @phrase_subs_regex), CHAR_SUBS.merge(PHRASE_SUBS))
|
35
37
|
@separators.each{|k| name.gsub!(k[0], k[1])}
|
36
38
|
name.gsub!(@residual_terminators_regex, '')
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|