dwc_agent 3.0.0.6 → 3.0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e33855504e8b60e240c2ccf1efb192dcbeb9878fb5d2f0172772ec2c2d3e9fa
4
- data.tar.gz: 260e2c9304731ad852b0587a22938b31d48aeb1bd7d922eb30755e795ec054fe
3
+ metadata.gz: ed891ad93906581d9340f3baa16006336cd9424d8c44bf7fecd0611db5546182
4
+ data.tar.gz: a6785a960284e60f948fa07dfb042b83adb656ee5b2473101478aba43bbabf8b
5
5
  SHA512:
6
- metadata.gz: bb32cd909dd7d824bc7e064316ce3e10ee6cc6b14a12371cc8464f94f2b5d0b2f0b605aeef924e44423f1af8c2af62c001f67511e2dd438086f097b81da29b57
7
- data.tar.gz: cf52be5fde1f4c7ebee816fd07ae3903c3f7c55a6a41542fd21cb265a868590a3391c0d17a514c2b40ac7ca7dbfd37d9cb53211aec4a0eb0c1ec9793aa4b7c6d
6
+ metadata.gz: 2765dfcc031e7cfc5e6c9d6b39f1aae9285fda95b029bd5a5d192292cafa911e885ce490a1e811055715b10bde1e986055fa36b13775c008ce04a6bda14baffb
7
+ data.tar.gz: 69807bf8ff357feec4ab9cd0a0e4c765cd49df19fa1b96eba6b8dfc7ade0c5c568d192dba83d325eec79e4a5f20b9752db8e5b5d5e560fcc3aa461062d5debe8
@@ -18,11 +18,14 @@ module DwcAgent
18
18
  # @return Namae::Name [Object] a new Namae object
19
19
  def clean(parsed_namae)
20
20
 
21
- if parsed_namae.given && GIVEN_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
21
+ if parsed_namae.given &&
22
+ GIVEN_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
22
23
  return Namae::Name.new
23
24
  end
24
25
 
25
- if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
26
+ if parsed_namae.family &&
27
+ parsed_namae.family.length == 3 &&
28
+ parsed_namae.family.count('.') == 1
26
29
  return Namae::Name.new
27
30
  end
28
31
 
@@ -30,7 +33,9 @@ module DwcAgent
30
33
  return Namae::Name.new
31
34
  end
32
35
 
33
- if parsed_namae.given && parsed_namae.given.count('.') >= 3 && /\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
36
+ if parsed_namae.given &&
37
+ parsed_namae.given.count('.') >= 3 &&
38
+ /\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
34
39
  return Namae::Name.new
35
40
  end
36
41
 
@@ -38,6 +43,13 @@ module DwcAgent
38
43
  return Namae::Name.new
39
44
  end
40
45
 
46
+ if parsed_namae.family &&
47
+ parsed_namae.family.count(".") == 1 &&
48
+ parsed_namae.family[-1] == "." &&
49
+ parsed_namae.family.length > 3
50
+ parsed_namae.family = parsed_namae.family.delete_suffix(".")
51
+ end
52
+
41
53
  if parsed_namae.given &&
42
54
  parsed_namae.family &&
43
55
  parsed_namae.family.count(".") > 0 &&
@@ -59,6 +71,15 @@ module DwcAgent
59
71
  parsed_namae.given = family
60
72
  end
61
73
 
74
+ if !parsed_namae.given &&
75
+ parsed_namae.particle &&
76
+ parsed_namae.family &&
77
+ /^[A-Za-z]{3,}\s+(?:[A-Z]\.\s?){1,}$/.match(parsed_namae.family)
78
+ matched = /^(?<family>[A-Za-z]{3,})\s+(?<given>([A-Z]\.\s?){1,})$/.match(parsed_namae.family)
79
+ parsed_namae.family = matched[:family]
80
+ parsed_namae.given = matched[:given]
81
+ end
82
+
62
83
  if parsed_namae.given &&
63
84
  (parsed_namae.given == parsed_namae.given.upcase ||
64
85
  parsed_namae.given == parsed_namae.given.downcase) &&
@@ -75,13 +96,21 @@ module DwcAgent
75
96
  parsed_namae.given = NameCase(parsed_namae.given)
76
97
  end
77
98
 
78
- if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
99
+ if parsed_namae.family &&
100
+ FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
79
101
  return Namae::Name.new
80
102
  end
81
103
 
104
+ if parsed_namae.family.nil? &&
105
+ !parsed_namae.given.nil? &&
106
+ !parsed_namae.given.include?(".")
107
+ parsed_namae.family = parsed_namae.given
108
+ parsed_namae.given = nil
109
+ end
110
+
82
111
  parsed_namae.normalize_initials
83
112
 
84
- family = parsed_namae.family.gsub(/\.\z/, '').strip rescue nil
113
+ family = parsed_namae.family
85
114
  given = parsed_namae.given.strip rescue nil
86
115
  particle = parsed_namae.particle.strip rescue nil
87
116
  appellation = parsed_namae.appellation.strip rescue nil
@@ -92,12 +121,10 @@ module DwcAgent
92
121
  given = given.gsub(".", ". ").strip
93
122
  end
94
123
 
95
- if family.nil? && !given.nil? && !given.include?(".")
96
- family = given
97
- given = nil
98
- end
99
-
100
- if !family.nil? && given.nil? && !particle.nil?
124
+ if !family.nil? &&
125
+ given.nil? &&
126
+ !particle.nil? &&
127
+ !PARTICLES.include?(particle.downcase)
101
128
  given = particle.sub(/[a-z]\./, &:upcase).sub(/^(.)/) { $1.capitalize }
102
129
  particle = nil
103
130
  end
@@ -110,8 +137,8 @@ module DwcAgent
110
137
  family = NameCase(family)
111
138
  end
112
139
 
113
- if !family.nil? && family.match(/[A-Z]$/)
114
- return Namae::Name.new
140
+ if !family.nil? && family.match(/[A-Z]{1,3}$/)
141
+ family = NameCase(family.upcase)
115
142
  end
116
143
 
117
144
  if given.nil? && !family.nil? && family.match(/^[A-Z]{2}/)
@@ -52,12 +52,12 @@ module DwcAgent
52
52
  \d+\s+(?i:Nov|Novemb(er|re))\.?\b|
53
53
  \d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
54
54
  \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
55
- \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
55
+ \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
56
56
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
57
57
  \b\s*(?i:maybe)\s*\b|
58
58
  \b\s*(?i:prob)\.\s*\b|
59
59
  \b\s*(?i:field\s*number)|
60
- \b\s*?(?i:malaise|light|pitfall|pan|suction|lobster|actinic light|cdc)\s*(?i:trap)\s*\b|
60
+ \b\s*?(?i:malaise|light|pitfall|pan|suction|lobster|actinic light|cdc|fisherm(a|e)n)\s*(?i:trap)\s*\b|
61
61
  \|\s*(?i:collector\s*(field\s*)?number).*$|
62
62
  \(?[,]?\s*?(?i:(local)?\s?collectors?|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
63
63
  \b[.-–,;:]?\s*(?i:department|faculty)\s*?(?i:of)?\s*?(?i:entomology|biology|zoology)|
@@ -240,7 +240,7 @@ module DwcAgent
240
240
  (?i:geographic)|
241
241
  (?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
242
242
  (?i:univ\.)|
243
- (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker|gamekeeper)|
243
+ (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker|gamekeeper)|
244
244
  (?i:non\s+pr(é|e)cis(é|e))|
245
245
  (?i:no\s+consta)|
246
246
  (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
@@ -261,8 +261,7 @@ module DwcAgent
261
261
  (?i:submersible)|
262
262
  (?i:synonymy?)|
263
263
  (?i:systematic|perspective)|
264
- \s+(?i:off)\s+|
265
- \s*(?i:too)\s+|\s*(?i:the)\s+|
264
+ ^\s*(?i:off|too|the)\s*|
266
265
  (?i:taxiderm(ies|y))|
267
266
  (?i:though)|
268
267
  (?i:texas\s+instruments?)\s*?(for)?|
@@ -342,4 +341,26 @@ module DwcAgent
342
341
 
343
342
  SUFFIX = /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/
344
343
 
344
+ PARTICLES = [
345
+ "ap",
346
+ "da",
347
+ "de",
348
+ "de'",
349
+ "del",
350
+ "der",
351
+ "des",
352
+ "di",
353
+ "do",
354
+ "dos",
355
+ "du",
356
+ "el",
357
+ "le",
358
+ "la",
359
+ "van",
360
+ "von",
361
+ "the",
362
+ "of",
363
+ "van der"
364
+ ]
365
+
345
366
  end
@@ -4,7 +4,7 @@ module DwcAgent
4
4
  MAJOR = 3
5
5
  MINOR = 0
6
6
  PATCH = 0
7
- BUILD = 6
7
+ BUILD = 9
8
8
 
9
9
  def self.version
10
10
  [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.6
4
+ version: 3.0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-11 00:00:00.000000000 Z
11
+ date: 2022-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae