dwc_agent 3.2.0.0 → 3.3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 817c66f4671d6f4b7adf698f743e2bd9f3af0b8c0acdcaa011a0a7aaa4e26b81
4
- data.tar.gz: 64e55fb97847322db8b35d974c5f1643887bf0fdc186f6befeba464c5d600fd1
3
+ metadata.gz: 40a0a33de602297e5e87e3059edbd0c88e0ad36bbb90f0803362698120f9e3b1
4
+ data.tar.gz: 94705d02d7cb7a3ac1647f903d15db7140ece849008ba84f6d49f1bf678abf14
5
5
  SHA512:
6
- metadata.gz: aa52bf9395a8df4678ec1a5cbf3dc3e523daa366eebb89f2e63918d94fbbe784c648a5a0f7906a76b6c5778752a98a74bfe61b6831c5157fe7aa74dd885b116a
7
- data.tar.gz: f285c2eabd308dba87a03d7a92487d55e3b224f9dfbad1c37d9e43fff9ae2457dd16015bc47805e18c3a69c794e8a29b64ee6d8474321c94de3952bd96959937
6
+ metadata.gz: 1baeb5c1367e570139fca30a1181395030b12d09b1d4f12a026ad48e28ebaecee70b20d81b96b02f2600da269f2342281c8929facc70f0b9c571cafcfb273501
7
+ data.tar.gz: bf7676d6ed221258ab6efe217a7b695cb619c54be8a8d14f16bb23a3f1bc9e0fa0a33474211e7f20e5a08b058e2ab4b57e4c5e1367be74d9719ad2502a035d77
@@ -2,17 +2,25 @@ module DwcAgent
2
2
 
3
3
  class Cleaner
4
4
 
5
+ @defaults = {
6
+ blacklist: BLACKLIST,
7
+ given_blacklist: GIVEN_BLACKLIST,
8
+ family_blacklist: FAMILY_BLACKLIST,
9
+ particles: PARTICLES
10
+ }
11
+
5
12
  class << self
13
+ attr_reader :defaults
14
+
6
15
  def instance
7
16
  Thread.current[:dwc_agent_cleaner] ||= new
8
17
  end
9
18
  end
10
19
 
11
- def initialize
12
- @blacklist = BLACKLIST
13
- @given_blacklist = GIVEN_BLACKLIST
14
- @family_blacklist = FAMILY_BLACKLIST
15
- @particles = PARTICLES
20
+ attr_reader :options
21
+
22
+ def initialize(options = {})
23
+ @options = self.class.defaults.merge(options)
16
24
  end
17
25
 
18
26
  def default
@@ -26,6 +34,8 @@ module DwcAgent
26
34
  # @return Namae::Name [Object] a new Namae object
27
35
  def clean(parsed_namae)
28
36
 
37
+ return default if !parsed_namae.instance_of?(Namae::Name)
38
+
29
39
  if parsed_namae.family &&
30
40
  parsed_namae.family == NameCase(parsed_namae.family) &&
31
41
  parsed_namae.display_order.split.join == parsed_namae.initials
@@ -33,7 +43,7 @@ module DwcAgent
33
43
  end
34
44
 
35
45
  if parsed_namae.given &&
36
- @given_blacklist.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
46
+ options[:given_blacklist].any?{ |s| s.casecmp(parsed_namae.given) == 0 }
37
47
  return
38
48
  end
39
49
 
@@ -53,7 +63,7 @@ module DwcAgent
53
63
  return default
54
64
  end
55
65
 
56
- if parsed_namae.display_order =~ @blacklist
66
+ if parsed_namae.display_order =~ options[:blacklist]
57
67
  return default
58
68
  end
59
69
 
@@ -111,7 +121,7 @@ module DwcAgent
111
121
  end
112
122
 
113
123
  if parsed_namae.family &&
114
- @family_blacklist.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
124
+ options[:family_blacklist].any?{ |s| s.casecmp(parsed_namae.family) == 0 }
115
125
  return default
116
126
  end
117
127
 
@@ -138,7 +148,7 @@ module DwcAgent
138
148
  if !family.nil? &&
139
149
  given.nil? &&
140
150
  !particle.nil? &&
141
- !@particles.include?(particle.downcase)
151
+ !options[:particles].include?(particle.downcase)
142
152
  given = particle.sub(/[a-z]\./, &:upcase).sub(/^(.)/) { $1.capitalize }
143
153
  particle = nil
144
154
  end
@@ -159,11 +169,11 @@ module DwcAgent
159
169
  return default
160
170
  end
161
171
 
162
- if !family.nil? && @family_blacklist.any?{ |s| s.casecmp(family) == 0 }
172
+ if !family.nil? && options[:family_blacklist].any?{ |s| s.casecmp(family) == 0 }
163
173
  return default
164
174
  end
165
175
 
166
- if !given.nil? && @given_blacklist.any?{ |s| s.casecmp(given) == 0 }
176
+ if !given.nil? && options[:given_blacklist].any?{ |s| s.casecmp(given) == 0 }
167
177
  return default
168
178
  end
169
179
 
@@ -10,7 +10,7 @@ module DwcAgent
10
10
  [,]?\s*\#*\s+\d+\-(?i:[A-Z]|\d)+\-?\d*[A-Za-z]*\z|
11
11
  \d*[A-Za-z]*\d*-\d*\z|
12
12
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
13
- \b[,;]?\s*(?:et\.?\s+al|&\s+al)\.?|
13
+ [,;\s]{1,}(?:et\.?\s+al|&\s+al)l?\.?|
14
14
  \b[,;]?\s*(?i:etal)\.?|
15
15
  \b[,;]?\s*(?i:et.al)\.?|
16
16
  \b\s+(bis|ter)(\b|\z)|
@@ -113,7 +113,7 @@ module DwcAgent
113
113
  (?i:not?)\s+(?i:name|date|details?|specific)?\s*?(?i:given|name|date|noted)|
114
114
  (?i:non?)\s+(?i:specificato)|
115
115
  \b[,;]\s+\d+\.?\z|
116
- [!@?]|
116
+ [!@?]\s*\-?\s*|
117
117
  \d{1,4}[\/.]?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x|xi|xii)[\/.]\d{1,4}|
118
118
  [,]?\d+|
119
119
  [,;]\z|
@@ -2,27 +2,33 @@ module DwcAgent
2
2
 
3
3
  class Parser
4
4
 
5
+ @defaults = {
6
+ prefer_comma_as_separator: true,
7
+ separator: SPLIT_BY,
8
+ title: TITLE,
9
+ appellation: APPELLATION,
10
+ suffix: SUFFIX,
11
+ strip_out_regex: Regexp.new(STRIP_OUT.to_s),
12
+ tidy_remains_regex: Regexp.new(POST_STRIP_TIDY.to_s),
13
+ char_subs_regex: Regexp.new([CHAR_SUBS.keys.join].to_s),
14
+ phrase_subs_regex: Regexp.new(PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s),
15
+ residual_terminators_regex: Regexp.new(SPLIT_BY.to_s + %r{\s*\z}.to_s),
16
+ separators: SEPARATORS.map{|k,v| [ Regexp.new(k), v] }
17
+ }
18
+
5
19
  class << self
20
+ attr_reader :defaults
21
+
6
22
  def instance
7
23
  Thread.current[:dwc_agent_parser] ||= new
8
24
  end
9
25
  end
10
26
 
11
- def initialize
12
- options = {
13
- prefer_comma_as_separator: true,
14
- separator: SPLIT_BY,
15
- title: TITLE,
16
- appellation: APPELLATION,
17
- suffix: SUFFIX
18
- }
19
- @namae = Namae::Parser.new(options)
20
- @strip_out_regex = Regexp.new STRIP_OUT.to_s
21
- @tidy_remains_regex = Regexp.new POST_STRIP_TIDY.to_s
22
- @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
23
- @phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
24
- @residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
25
- @separators = SEPARATORS.map{|k,v| [ Regexp.new(k), v] }
27
+ attr_reader :options, :namae
28
+
29
+ def initialize(options = {})
30
+ @options = self.class.defaults.merge(options)
31
+ @namae = Namae::Parser.new(@options)
26
32
  end
27
33
 
28
34
  # Parses the passed-in string and returns a list of names.
@@ -31,14 +37,14 @@ module DwcAgent
31
37
  # @return [Array] the list of parsed names
32
38
  def parse(name)
33
39
  return [] if name.nil? || name == ""
34
- name.gsub!(@strip_out_regex, ' ')
35
- name.gsub!(@tidy_remains_regex, '')
36
- name.gsub!(Regexp.union(@char_subs_regex, @phrase_subs_regex), CHAR_SUBS.merge(PHRASE_SUBS))
37
- @separators.each{|k| name.gsub!(k[0], k[1])}
38
- name.gsub!(@residual_terminators_regex, '')
40
+ name.gsub!(options[:strip_out_regex], ' ')
41
+ name.gsub!(options[:tidy_remains_regex], '')
42
+ name.gsub!(Regexp.union(options[:char_subs_regex], options[:phrase_subs_regex]), CHAR_SUBS.merge(PHRASE_SUBS))
43
+ options[:separators].each{|k| name.gsub!(k[0], k[1])}
44
+ name.gsub!(options[:residual_terminators_regex], '')
39
45
  name.squeeze!(' ')
40
46
  name.strip!
41
- @namae.parse(name)
47
+ namae.parse(name)
42
48
  end
43
49
 
44
50
  end
@@ -3,7 +3,7 @@ module DwcAgent
3
3
  class Version
4
4
 
5
5
  MAJOR = 3
6
- MINOR = 2
6
+ MINOR = 3
7
7
  PATCH = 0
8
8
  BUILD = 0
9
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0.0
4
+ version: 3.3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-21 00:00:00.000000000 Z
11
+ date: 2024-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae