dwc_agent 1.4.12 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +5 -2
- data/lib/dwc_agent/constants.rb +7 -7
- data/lib/dwc_agent/parser.rb +4 -2
- data/lib/dwc_agent/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87cfd6b6ab9ee83156f9503e48669691d486f143f64a623f93a40f064d2dc7f7
|
4
|
+
data.tar.gz: 7f40287aa6bf7e7b90408d72594fd9ab5498401c5ee402da6f2a78163f011a9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cb91ffc7530bd1a93fbbc70c716ad96d49cab139bf95507e3a688566ce997cb71510b13013bb07cb52dffe2ce7419d294392028eedd28f1caba8b032eb98c9a
|
7
|
+
data.tar.gz: ae226c63106a2b41899b6d4c85c92aa372d8d1cbce8a90023cf9b8d1a4c153a28bd33a9eb8064c492fa79d5d4229ec8979d5ca4f751d97fc1ec09132b0e46ca3
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -16,7 +16,7 @@ module DwcAgent
|
|
16
16
|
# @param parsed_namae [Object] the namae object
|
17
17
|
# @return [Hash] the given, family hash
|
18
18
|
def clean(parsed_namae)
|
19
|
-
blank_name = { given: nil, family: nil,
|
19
|
+
blank_name = { title: nil, appellation: nil, given: nil, particle: nil, family: nil, suffix: nil }
|
20
20
|
|
21
21
|
if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
22
22
|
return blank_name
|
@@ -84,6 +84,9 @@ module DwcAgent
|
|
84
84
|
family = parsed_namae.family.gsub(/\.\z/, '').strip rescue nil
|
85
85
|
given = parsed_namae.given.strip rescue nil
|
86
86
|
particle = parsed_namae.particle.strip rescue nil
|
87
|
+
appellation = parsed_namae.appellation.strip rescue nil
|
88
|
+
suffix = parsed_names.suffix.strip rescue nil
|
89
|
+
title = parsed_names.title.strip rescue nil
|
87
90
|
|
88
91
|
if !given.nil? && given.match(/[A-Z]\.[A-Za-z]{2,}/)
|
89
92
|
given = given.gsub(".", ". ").strip
|
@@ -123,7 +126,7 @@ module DwcAgent
|
|
123
126
|
return blank_name
|
124
127
|
end
|
125
128
|
|
126
|
-
{ given: given, family: family,
|
129
|
+
{ title: nil, appellation: nil, given: given, particle: particle, family: family, suffix: nil }
|
127
130
|
end
|
128
131
|
|
129
132
|
end
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -159,16 +159,12 @@ module DwcAgent
|
|
159
159
|
}
|
160
160
|
|
161
161
|
PHRASE_SUBS = {
|
162
|
-
'dr\.' => 'Dr. ',
|
163
|
-
'mr\.' => 'Mr. ',
|
164
|
-
'mrs\.' => 'Mrs. ',
|
165
|
-
'ms\.' => 'Ms. ',
|
166
162
|
'prof\.' => 'Prof. ',
|
167
163
|
'\, ph\.d\.' => ' Ph.D.',
|
168
164
|
'\, bro\.' => ' Bro.',
|
169
|
-
' jr
|
170
|
-
' jr
|
171
|
-
'
|
165
|
+
' jr\.\,' => ' Jr.;',
|
166
|
+
' jr\,' => ' Jr.;',
|
167
|
+
'\-jr' => ' Jr.'
|
172
168
|
}
|
173
169
|
|
174
170
|
COMPLEX_SEPARATORS = %r{
|
@@ -274,4 +270,8 @@ module DwcAgent
|
|
274
270
|
|
275
271
|
TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
|
276
272
|
|
273
|
+
APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
274
|
+
|
275
|
+
SUFFIX = /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/
|
276
|
+
|
277
277
|
end
|
data/lib/dwc_agent/parser.rb
CHANGED
@@ -11,14 +11,16 @@ module DwcAgent
|
|
11
11
|
options = {
|
12
12
|
prefer_comma_as_separator: true,
|
13
13
|
separator: SPLIT_BY,
|
14
|
-
title: TITLE
|
14
|
+
title: TITLE,
|
15
|
+
appellation: APPELLATION,
|
16
|
+
suffix: SUFFIX
|
15
17
|
}
|
16
18
|
@namae = Namae::Parser.new(options)
|
17
19
|
|
18
20
|
@strip_out_regex = Regexp.new STRIP_OUT.to_s
|
19
21
|
@residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
|
20
22
|
@char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
|
21
|
-
@phrase_subs_regex = Regexp.new
|
23
|
+
@phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.join('|').to_s, Regexp::IGNORECASE
|
22
24
|
@complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
|
23
25
|
@add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
|
24
26
|
end
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|