dwc_agent 1.4.12 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +5 -2
- data/lib/dwc_agent/constants.rb +7 -7
- data/lib/dwc_agent/parser.rb +4 -2
- data/lib/dwc_agent/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87cfd6b6ab9ee83156f9503e48669691d486f143f64a623f93a40f064d2dc7f7
|
4
|
+
data.tar.gz: 7f40287aa6bf7e7b90408d72594fd9ab5498401c5ee402da6f2a78163f011a9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cb91ffc7530bd1a93fbbc70c716ad96d49cab139bf95507e3a688566ce997cb71510b13013bb07cb52dffe2ce7419d294392028eedd28f1caba8b032eb98c9a
|
7
|
+
data.tar.gz: ae226c63106a2b41899b6d4c85c92aa372d8d1cbce8a90023cf9b8d1a4c153a28bd33a9eb8064c492fa79d5d4229ec8979d5ca4f751d97fc1ec09132b0e46ca3
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -16,7 +16,7 @@ module DwcAgent
|
|
16
16
|
# @param parsed_namae [Object] the namae object
|
17
17
|
# @return [Hash] the given, family hash
|
18
18
|
def clean(parsed_namae)
|
19
|
-
blank_name = { given: nil, family: nil,
|
19
|
+
blank_name = { title: nil, appellation: nil, given: nil, particle: nil, family: nil, suffix: nil }
|
20
20
|
|
21
21
|
if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
22
22
|
return blank_name
|
@@ -84,6 +84,9 @@ module DwcAgent
|
|
84
84
|
family = parsed_namae.family.gsub(/\.\z/, '').strip rescue nil
|
85
85
|
given = parsed_namae.given.strip rescue nil
|
86
86
|
particle = parsed_namae.particle.strip rescue nil
|
87
|
+
appellation = parsed_namae.appellation.strip rescue nil
|
88
|
+
suffix = parsed_names.suffix.strip rescue nil
|
89
|
+
title = parsed_names.title.strip rescue nil
|
87
90
|
|
88
91
|
if !given.nil? && given.match(/[A-Z]\.[A-Za-z]{2,}/)
|
89
92
|
given = given.gsub(".", ". ").strip
|
@@ -123,7 +126,7 @@ module DwcAgent
|
|
123
126
|
return blank_name
|
124
127
|
end
|
125
128
|
|
126
|
-
{ given: given, family: family,
|
129
|
+
{ title: nil, appellation: nil, given: given, particle: particle, family: family, suffix: nil }
|
127
130
|
end
|
128
131
|
|
129
132
|
end
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -159,16 +159,12 @@ module DwcAgent
|
|
159
159
|
}
|
160
160
|
|
161
161
|
PHRASE_SUBS = {
|
162
|
-
'dr\.' => 'Dr. ',
|
163
|
-
'mr\.' => 'Mr. ',
|
164
|
-
'mrs\.' => 'Mrs. ',
|
165
|
-
'ms\.' => 'Ms. ',
|
166
162
|
'prof\.' => 'Prof. ',
|
167
163
|
'\, ph\.d\.' => ' Ph.D.',
|
168
164
|
'\, bro\.' => ' Bro.',
|
169
|
-
' jr
|
170
|
-
' jr
|
171
|
-
'
|
165
|
+
' jr\.\,' => ' Jr.;',
|
166
|
+
' jr\,' => ' Jr.;',
|
167
|
+
'\-jr' => ' Jr.'
|
172
168
|
}
|
173
169
|
|
174
170
|
COMPLEX_SEPARATORS = %r{
|
@@ -274,4 +270,8 @@ module DwcAgent
|
|
274
270
|
|
275
271
|
TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
|
276
272
|
|
273
|
+
APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
274
|
+
|
275
|
+
SUFFIX = /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/
|
276
|
+
|
277
277
|
end
|
data/lib/dwc_agent/parser.rb
CHANGED
@@ -11,14 +11,16 @@ module DwcAgent
|
|
11
11
|
options = {
|
12
12
|
prefer_comma_as_separator: true,
|
13
13
|
separator: SPLIT_BY,
|
14
|
-
title: TITLE
|
14
|
+
title: TITLE,
|
15
|
+
appellation: APPELLATION,
|
16
|
+
suffix: SUFFIX
|
15
17
|
}
|
16
18
|
@namae = Namae::Parser.new(options)
|
17
19
|
|
18
20
|
@strip_out_regex = Regexp.new STRIP_OUT.to_s
|
19
21
|
@residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
|
20
22
|
@char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
|
21
|
-
@phrase_subs_regex = Regexp.new
|
23
|
+
@phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.join('|').to_s, Regexp::IGNORECASE
|
22
24
|
@complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
|
23
25
|
@add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
|
24
26
|
end
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|