dwc_agent 3.0.1.5 → 3.0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +11 -2
- data/lib/dwc_agent/constants.rb +7 -4
- data/lib/dwc_agent/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cf15dbefcecdfe7586bb2f71977477a82f7069befa6ab4ec0063db8f1417175
|
4
|
+
data.tar.gz: 5eb22a22b08b1d5925f163093460448cc6f2428ce7a70df8da175fe198ca7364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77c083f4b4672bf5381f43df6b0f600760b6ce5756929a9c0255d876ed170d72594be69437e78af43e7b0c671c33373b5b66822b6bc5874603f73b92134919b6
|
7
|
+
data.tar.gz: fd394686ecf2a68f3b98a31fd33c3a5abf42e4e453c1582af92d4fb913bfc7edc5cfc2ece996935697b64b3c86da7babaaafbc78566d7d255ce93bab3a511ac2
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -26,6 +26,12 @@ module DwcAgent
|
|
26
26
|
# @return Namae::Name [Object] a new Namae object
|
27
27
|
def clean(parsed_namae)
|
28
28
|
|
29
|
+
if parsed_namae.family &&
|
30
|
+
parsed_namae.family == NameCase(parsed_namae.family) &&
|
31
|
+
parsed_namae.display_order.split.join == parsed_namae.initials
|
32
|
+
return default
|
33
|
+
end
|
34
|
+
|
29
35
|
if parsed_namae.given &&
|
30
36
|
@given_blacklist.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
|
31
37
|
return
|
@@ -72,6 +78,7 @@ module DwcAgent
|
|
72
78
|
parsed_namae.family &&
|
73
79
|
parsed_namae.family.length <=3 &&
|
74
80
|
parsed_namae.family == parsed_namae.family.upcase &&
|
81
|
+
#parsed_namae.family != NameCase(parsed_namae.family) &&
|
75
82
|
parsed_namae.given[-1] != "."
|
76
83
|
given = parsed_namae.given
|
77
84
|
family = parsed_namae.family
|
@@ -137,7 +144,7 @@ module DwcAgent
|
|
137
144
|
particle = nil
|
138
145
|
end
|
139
146
|
|
140
|
-
if !particle.nil? && particle.include?(".")
|
147
|
+
if !particle.nil? && particle.include?(".") && !particle.include?("v")
|
141
148
|
particle = nil
|
142
149
|
end
|
143
150
|
|
@@ -167,7 +174,9 @@ module DwcAgent
|
|
167
174
|
given: given,
|
168
175
|
particle: particle,
|
169
176
|
family: family,
|
170
|
-
suffix: suffix
|
177
|
+
suffix: suffix,
|
178
|
+
nick: parsed_namae.nick,
|
179
|
+
dropping_particle: parsed_namae.dropping_particle
|
171
180
|
}
|
172
181
|
Namae::Name.new(name)
|
173
182
|
end
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -13,6 +13,7 @@ module DwcAgent
|
|
13
13
|
\b[,;]?\s*(?i:and|&)?\s*(?i:others|party)\s*\b|
|
14
14
|
\b[,;]?\s*(?i:etc)\.?|
|
15
15
|
\b[,;]?\s*(?i:exp)\.?\s*(\b|\z)|
|
16
|
+
\b[,;]?\s*(?i:aboard)[^$]+|
|
16
17
|
\b[,;]?\s*(?i:on)\b|
|
17
18
|
\b[,;]?\s*(?i:unkn?own)\b|
|
18
19
|
\b[,;]?\s*(?i:n/a)\b|
|
@@ -52,7 +53,7 @@ module DwcAgent
|
|
52
53
|
\d+\s+(?i:Nov|Novemb(er|re))\.?\b|
|
53
54
|
\d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
|
54
55
|
\b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
|
55
|
-
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
|
56
|
+
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
|
56
57
|
(?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
|
57
58
|
\b\s*(?i:maybe)\s*\b|
|
58
59
|
\b\s*(?i:prob)\.\s*\b|
|
@@ -102,7 +103,7 @@ module DwcAgent
|
|
102
103
|
(?i:not?)\s+(?i:name|date|details?|specific)?\s*?(?i:given|name|date|noted)|
|
103
104
|
(?i:non?)\s+(?i:specificato)|
|
104
105
|
\b[,;]\s+\d+\z|
|
105
|
-
[
|
106
|
+
[!@?]|
|
106
107
|
[,]?\d+|
|
107
108
|
\s+\d+?(\/|\.)?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x)(\/|\.)\d+|
|
108
109
|
[,;]\z|
|
@@ -121,7 +122,8 @@ module DwcAgent
|
|
121
122
|
^[-,.\s;*\d]+\s?|
|
122
123
|
\s*?-{2,}\s*?|
|
123
124
|
^(?i:exc?p?)[:.]\s*|
|
124
|
-
(
|
125
|
+
^(?:ex\.?|in)\s+(?:he?r?b)\.?\s+|
|
126
|
+
(?!^)(?:ex\.?|in)\s+(?:he?r?b)\.?\s+.*$|
|
125
127
|
\:?\s*(?i:exch)(\b|\z)|
|
126
128
|
\s+de\s*$|
|
127
129
|
\.{2,}$|
|
@@ -132,7 +134,7 @@ module DwcAgent
|
|
132
134
|
[–|ǀ∣|│&+\/;:]|
|
133
135
|
\s+-\s+|
|
134
136
|
\s+a\.\s+|
|
135
|
-
\b(con|e|y|i|en|et|or|per|for)\s*\b|
|
137
|
+
\b(con|e|y|i|en|et|or|per|for|und)\s*\b|
|
136
138
|
\b(?i:and|with)\s*\b|
|
137
139
|
\b(?i:annotated(\s+by)?)\s*\b|
|
138
140
|
\b(?i:coll\.)\s*\b|
|
@@ -159,6 +161,7 @@ module DwcAgent
|
|
159
161
|
}x
|
160
162
|
|
161
163
|
CHAR_SUBS = {
|
164
|
+
'"' => '\'',
|
162
165
|
'|' => ' | ',
|
163
166
|
'ǀ' => ' | ',
|
164
167
|
'∣' => ' | ',
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: '0'
|
121
121
|
requirements: []
|
122
|
-
rubygems_version: 3.3.
|
122
|
+
rubygems_version: 3.3.7
|
123
123
|
signing_key:
|
124
124
|
specification_version: 4
|
125
125
|
summary: Parse Darwin Core agent terms such as recordedBy and identifiedBy
|