dwc_agent 0.4.3 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/dwcagent +1 -1
- data/bin/dwcagent-similarity +1 -1
- data/lib/dwc_agent/cleaner.rb +6 -6
- data/lib/dwc_agent/constants.rb +5 -2
- data/lib/dwc_agent/parser.rb +1 -1
- data/lib/dwc_agent/version.rb +3 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89ee1173b349dee8f3ef6fad429449acf1fb3816ae49972b0fb35e7eb9a37720
|
4
|
+
data.tar.gz: c4dc07d7b1e113835ca065f17cb86e394a313b5b1fc335c2bdbca7ec8bafbdb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5c450fcb300b84bf942643ed527658840c8c2981cc4d3766a0ff9c3d4f4f5e27c940342f7a344ca502029f0b51e61b1580a08b8c6a68a19db55279cfe52fba6
|
7
|
+
data.tar.gz: 02a17d7b41313802dc6e14efbb0f54604768a53c7d5637114204d058112748189809fc20b557b772cf1650045c4856ec6ba49bac162a28d3768e729cf872092c
|
data/bin/dwcagent
CHANGED
data/bin/dwcagent-similarity
CHANGED
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -16,16 +16,12 @@ module DwcAgent
|
|
16
16
|
# @param parsed_namae [Object] the namae object
|
17
17
|
# @return [Hash] the given, family hash
|
18
18
|
def clean(parsed_namae)
|
19
|
-
blank_name = { given: nil, family: nil }
|
19
|
+
blank_name = { given: nil, family: nil, particle: nil }
|
20
20
|
|
21
21
|
if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
|
22
22
|
return blank_name
|
23
23
|
end
|
24
24
|
|
25
|
-
if parsed_namae.family && parsed_namae.family.length < 2 && parsed_namae.family.count('.') == 0
|
26
|
-
return blank_name
|
27
|
-
end
|
28
|
-
|
29
25
|
if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
|
30
26
|
return blank_name
|
31
27
|
end
|
@@ -99,6 +95,10 @@ module DwcAgent
|
|
99
95
|
particle = nil
|
100
96
|
end
|
101
97
|
|
98
|
+
if !particle.nil? && particle.include?(".")
|
99
|
+
particle = nil
|
100
|
+
end
|
101
|
+
|
102
102
|
if !family.nil? && (family == family.upcase || family == family.downcase)
|
103
103
|
family = NameCase(family)
|
104
104
|
end
|
@@ -115,7 +115,7 @@ module DwcAgent
|
|
115
115
|
return blank_name
|
116
116
|
end
|
117
117
|
|
118
|
-
{ given: given, family: family }
|
118
|
+
{ given: given, family: family, particle: particle }
|
119
119
|
end
|
120
120
|
|
121
121
|
end
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -81,6 +81,7 @@ module DwcAgent
|
|
81
81
|
AFSC\/POLISH\s+SORTING\s+CTR\.?|
|
82
82
|
(?i:university|museum|exhibits?)|
|
83
83
|
(?i:uqam)|
|
84
|
+
(?i:sem\s+(colec?tor|data))|
|
84
85
|
\b[,;]\s+\d+\z|
|
85
86
|
["!@?]|
|
86
87
|
[,]?\d+|
|
@@ -158,7 +159,7 @@ module DwcAgent
|
|
158
159
|
}
|
159
160
|
|
160
161
|
COMPLEX_SEPARATORS = %r{
|
161
|
-
^(
|
162
|
+
^(\S{4,},\s+(?:\S\.\s*){1,})\s+(\S{4,},\s+(?:\S\.\s*){1,})$
|
162
163
|
}x
|
163
164
|
|
164
165
|
BLACKLIST = %r{
|
@@ -227,7 +228,9 @@ module DwcAgent
|
|
227
228
|
"von",
|
228
229
|
"the",
|
229
230
|
"of",
|
230
|
-
"curators"
|
231
|
+
"curators",
|
232
|
+
"nomenclatural",
|
233
|
+
"adjustment"
|
231
234
|
]
|
232
235
|
|
233
236
|
TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|docteur|mme|abbé|ptre)\.?|frère|frere|père|pere|professor|esq\.?)(\s+|$)/i
|
data/lib/dwc_agent/parser.rb
CHANGED
@@ -20,7 +20,7 @@ module DwcAgent
|
|
20
20
|
@char_subs_regex = Regexp.new [CHAR_SUBS.keys.join('\\')].to_s
|
21
21
|
@phrase_subs_regex = Regexp.new (PHRASE_SUBS.keys.join('|')).to_s
|
22
22
|
@complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
|
23
|
-
@add_separators_regex = Regexp.new %r{(
|
23
|
+
@add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
|
24
24
|
end
|
25
25
|
|
26
26
|
# Parses the passed-in string and returns a list of names.
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|