dwc_agent 3.2.1.0 → 3.3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +19 -11
- data/lib/dwc_agent/parser.rb +27 -21
- data/lib/dwc_agent/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40a0a33de602297e5e87e3059edbd0c88e0ad36bbb90f0803362698120f9e3b1
|
4
|
+
data.tar.gz: 94705d02d7cb7a3ac1647f903d15db7140ece849008ba84f6d49f1bf678abf14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1baeb5c1367e570139fca30a1181395030b12d09b1d4f12a026ad48e28ebaecee70b20d81b96b02f2600da269f2342281c8929facc70f0b9c571cafcfb273501
|
7
|
+
data.tar.gz: bf7676d6ed221258ab6efe217a7b695cb619c54be8a8d14f16bb23a3f1bc9e0fa0a33474211e7f20e5a08b058e2ab4b57e4c5e1367be74d9719ad2502a035d77
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -2,17 +2,25 @@ module DwcAgent
|
|
2
2
|
|
3
3
|
class Cleaner
|
4
4
|
|
5
|
+
@defaults = {
|
6
|
+
blacklist: BLACKLIST,
|
7
|
+
given_blacklist: GIVEN_BLACKLIST,
|
8
|
+
family_blacklist: FAMILY_BLACKLIST,
|
9
|
+
particles: PARTICLES
|
10
|
+
}
|
11
|
+
|
5
12
|
class << self
|
13
|
+
attr_reader :defaults
|
14
|
+
|
6
15
|
def instance
|
7
16
|
Thread.current[:dwc_agent_cleaner] ||= new
|
8
17
|
end
|
9
18
|
end
|
10
19
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
@
|
15
|
-
@particles = PARTICLES
|
20
|
+
attr_reader :options
|
21
|
+
|
22
|
+
def initialize(options = {})
|
23
|
+
@options = self.class.defaults.merge(options)
|
16
24
|
end
|
17
25
|
|
18
26
|
def default
|
@@ -35,7 +43,7 @@ module DwcAgent
|
|
35
43
|
end
|
36
44
|
|
37
45
|
if parsed_namae.given &&
|
38
|
-
|
46
|
+
options[:given_blacklist].any?{ |s| s.casecmp(parsed_namae.given) == 0 }
|
39
47
|
return
|
40
48
|
end
|
41
49
|
|
@@ -55,7 +63,7 @@ module DwcAgent
|
|
55
63
|
return default
|
56
64
|
end
|
57
65
|
|
58
|
-
if parsed_namae.display_order =~
|
66
|
+
if parsed_namae.display_order =~ options[:blacklist]
|
59
67
|
return default
|
60
68
|
end
|
61
69
|
|
@@ -113,7 +121,7 @@ module DwcAgent
|
|
113
121
|
end
|
114
122
|
|
115
123
|
if parsed_namae.family &&
|
116
|
-
|
124
|
+
options[:family_blacklist].any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
117
125
|
return default
|
118
126
|
end
|
119
127
|
|
@@ -140,7 +148,7 @@ module DwcAgent
|
|
140
148
|
if !family.nil? &&
|
141
149
|
given.nil? &&
|
142
150
|
!particle.nil? &&
|
143
|
-
|
151
|
+
!options[:particles].include?(particle.downcase)
|
144
152
|
given = particle.sub(/[a-z]\./, &:upcase).sub(/^(.)/) { $1.capitalize }
|
145
153
|
particle = nil
|
146
154
|
end
|
@@ -161,11 +169,11 @@ module DwcAgent
|
|
161
169
|
return default
|
162
170
|
end
|
163
171
|
|
164
|
-
if !family.nil? &&
|
172
|
+
if !family.nil? && options[:family_blacklist].any?{ |s| s.casecmp(family) == 0 }
|
165
173
|
return default
|
166
174
|
end
|
167
175
|
|
168
|
-
if !given.nil? &&
|
176
|
+
if !given.nil? && options[:given_blacklist].any?{ |s| s.casecmp(given) == 0 }
|
169
177
|
return default
|
170
178
|
end
|
171
179
|
|
data/lib/dwc_agent/parser.rb
CHANGED
@@ -2,27 +2,33 @@ module DwcAgent
|
|
2
2
|
|
3
3
|
class Parser
|
4
4
|
|
5
|
+
@defaults = {
|
6
|
+
prefer_comma_as_separator: true,
|
7
|
+
separator: SPLIT_BY,
|
8
|
+
title: TITLE,
|
9
|
+
appellation: APPELLATION,
|
10
|
+
suffix: SUFFIX,
|
11
|
+
strip_out_regex: Regexp.new(STRIP_OUT.to_s),
|
12
|
+
tidy_remains_regex: Regexp.new(POST_STRIP_TIDY.to_s),
|
13
|
+
char_subs_regex: Regexp.new([CHAR_SUBS.keys.join].to_s),
|
14
|
+
phrase_subs_regex: Regexp.new(PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s),
|
15
|
+
residual_terminators_regex: Regexp.new(SPLIT_BY.to_s + %r{\s*\z}.to_s),
|
16
|
+
separators: SEPARATORS.map{|k,v| [ Regexp.new(k), v] }
|
17
|
+
}
|
18
|
+
|
5
19
|
class << self
|
20
|
+
attr_reader :defaults
|
21
|
+
|
6
22
|
def instance
|
7
23
|
Thread.current[:dwc_agent_parser] ||= new
|
8
24
|
end
|
9
25
|
end
|
10
26
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
appellation: APPELLATION,
|
17
|
-
suffix: SUFFIX
|
18
|
-
}
|
19
|
-
@namae = Namae::Parser.new(options)
|
20
|
-
@strip_out_regex = Regexp.new STRIP_OUT.to_s
|
21
|
-
@tidy_remains_regex = Regexp.new POST_STRIP_TIDY.to_s
|
22
|
-
@char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
|
23
|
-
@phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
|
24
|
-
@residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
|
25
|
-
@separators = SEPARATORS.map{|k,v| [ Regexp.new(k), v] }
|
27
|
+
attr_reader :options, :namae
|
28
|
+
|
29
|
+
def initialize(options = {})
|
30
|
+
@options = self.class.defaults.merge(options)
|
31
|
+
@namae = Namae::Parser.new(@options)
|
26
32
|
end
|
27
33
|
|
28
34
|
# Parses the passed-in string and returns a list of names.
|
@@ -31,14 +37,14 @@ module DwcAgent
|
|
31
37
|
# @return [Array] the list of parsed names
|
32
38
|
def parse(name)
|
33
39
|
return [] if name.nil? || name == ""
|
34
|
-
name.gsub!(
|
35
|
-
name.gsub!(
|
36
|
-
name.gsub!(Regexp.union(
|
37
|
-
|
38
|
-
name.gsub!(
|
40
|
+
name.gsub!(options[:strip_out_regex], ' ')
|
41
|
+
name.gsub!(options[:tidy_remains_regex], '')
|
42
|
+
name.gsub!(Regexp.union(options[:char_subs_regex], options[:phrase_subs_regex]), CHAR_SUBS.merge(PHRASE_SUBS))
|
43
|
+
options[:separators].each{|k| name.gsub!(k[0], k[1])}
|
44
|
+
name.gsub!(options[:residual_terminators_regex], '')
|
39
45
|
name.squeeze!(' ')
|
40
46
|
name.strip!
|
41
|
-
|
47
|
+
namae.parse(name)
|
42
48
|
end
|
43
49
|
|
44
50
|
end
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-11-
|
11
|
+
date: 2024-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|