dwc_agent 3.0.0.0 → 3.0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 969bc7536c6e62205c92cd816cccf491d4f307f81cafaa7c9fd4ba90d41a1bb1
4
- data.tar.gz: 21a049be78e691ab7905eae6af9723b9b2aeaca4e7d7594107f968494d71a932
3
+ metadata.gz: e92321c01b304886a0ed67c78c60241cefef5fbef3a57df6e39855747a555375
4
+ data.tar.gz: 926bc155c9063ef2928db59f14bd18471c14db5b75c75fd594a157a7f8e75d31
5
5
  SHA512:
6
- metadata.gz: cdb088fa5df59b1c058226b8266b0e8d3c686f6e3632ddf09867b0ac75c8e9cd292ab44025e2e97720161963e0a5f7f9d7daf718f55ed9d0a66f9d0a44c03d78
7
- data.tar.gz: 222e2443238ba507d97a7f069d08fbec5f070ec647f83d353ca482a0bbec926587e5ef65af21d7505f6abb714c709d2f3409efcc00d204d47f7ea723f05e3eb5
6
+ metadata.gz: bc4a450f5508b968999fe9bb6226b9565c10a9981335fa47dab31872369476e6273e7885df6681eb0bbf1b1667a1ab5e3db32b96c3ffeecb6c56e5a16f3701e1
7
+ data.tar.gz: 52b82ea6a804549f1ad8a14f71b004431883a122d0b2ba52f58abd41097780b81f2d423e4b64bb8d6f1dc6a46441e27e5be33577efe75b4199ef567bd248504b
data/bin/dwcagent CHANGED
@@ -8,7 +8,7 @@ names = []
8
8
  DwcAgent.parse(ARGV[0].dup).each do |r|
9
9
  name = DwcAgent.clean(r)
10
10
  if !name.family.nil? && name.family.length >= 2
11
- names << name
11
+ names << name.to_h
12
12
  end
13
13
  end
14
- puts names.uniq.to_h.to_json
14
+ puts names.uniq.to_json
@@ -14,8 +14,8 @@ module DwcAgent
14
14
  # Cleans the passed-in namae object from the parse method and
15
15
  # re-organizes it to better match expected Darwin Core output.
16
16
  #
17
- # @param parsed_namae [Object] the namae object
18
- # @return [Hash] the given, family hash
17
+ # @param parsed_namae [Namae::Name] a Namae object
18
+ # @return Namae::Name [Object] a new Namae object
19
19
  def clean(parsed_namae)
20
20
 
21
21
  if parsed_namae.given && GIVEN_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
@@ -52,7 +52,7 @@ module DwcAgent
52
52
  \d+\s+(?i:Nov|Novemb(er|re))\.?\b|
53
53
  \d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
54
54
  \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
55
- \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Israel|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
55
+ \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
56
56
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
57
57
  \b\s*(?i:maybe)\s*\b|
58
58
  \b\s*(?i:prob)\.\s*\b|
@@ -196,11 +196,11 @@ module DwcAgent
196
196
  SEPARATORS = {
197
197
  "^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
198
198
  "(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
199
- "(\\w*),?\\s+(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
200
- "^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+(\\w{2,})(.*)" => "\\1 \\3 | \\2 \\3 | \\4",
201
- "^([A-Z]\\w{2,}),\\s*?([A-Z]\\w{2,})\\s*?[,&]\\s*?([A-Z]\\w{2,})$" => "\\1 | \\2 | \\3",
202
- "^([A-Z]\\w{2,}),\\s*?([A-Z]\\w{2,}),\\s*?([A-Z]\\w{2,})\\s*?[,&]\\s*?([A-Z]\\w{3,})$" => "\\1 | \\2 | \\3 | \\4",
203
- "^([A-Z]\\w{2,}),\\s*?([A-Z]\\w{2,}),\\s*?([A-Z]\\w{2,}),\\s*?([A-Z]\\w{2,})\\s*?[,&]\\s*?([A-Z]\\w{3,})$" => "\\1 | \\2 | \\3 | \\4 | \\5"
199
+ "([[:alpha:]]*),?\\s+(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
200
+ "^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]]{2,})(.*)" => "\\1 \\3 | \\2 \\3 | \\4",
201
+ "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?[,&]\\s*?([A-Z][[:alpha:]]{2,})$" => "\\1 | \\2 | \\3",
202
+ "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?[,&]\\s*?([A-Z][[:alpha:]]{3,})$" => "\\1 | \\2 | \\3 | \\4",
203
+ "^([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,}),\\s*?([A-Z][[:alpha:]]{2,})\\s*?[,&]\\s*?([A-Z][[:alpha:]]{3,})$" => "\\1 | \\2 | \\3 | \\4 | \\5"
204
204
  }
205
205
 
206
206
  BLACKLIST = %r{
@@ -329,7 +329,7 @@ module DwcAgent
329
329
  "has not"
330
330
  ]
331
331
 
332
- TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|proff?|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
332
+ TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|proff?|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|doct(eu|o)r|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
333
333
 
334
334
  APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
335
335
 
@@ -4,7 +4,7 @@ module DwcAgent
4
4
  MAJOR = 3
5
5
  MINOR = 0
6
6
  PATCH = 0
7
- BUILD = 0
7
+ BUILD = 4
8
8
 
9
9
  def self.version
10
10
  [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.0
4
+ version: 3.0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-30 00:00:00.000000000 Z
11
+ date: 2021-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae