dwc_agent 3.0.9.0 → 3.0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0d83fd4dba1ddb6b9976bb1dea4c95a5365cf31b9c1afc335e035f70317a40f
4
- data.tar.gz: 2fc6de4a6de283d9cf8d813306bb034ca67e20e684ce6136d6f2366512633d34
3
+ metadata.gz: 84e34d440f575183168540f7849d523d7a5938dc5e96105c728b206d278e6a9a
4
+ data.tar.gz: c03d40835ea1fc03be60bd042ffcdf62d45ef1e5ad02076162fe73f03dbeca68
5
5
  SHA512:
6
- metadata.gz: d6ca8f0a7507c8a8d09b183f59ca47d97ff30a0de3e017da155e0b1e57ae5d34df4328ae9bdbb2fd4682b0d57afac7e251baebaa305fea5bc46b2886b22e5385
7
- data.tar.gz: b9ad4777ade9052a3cd54926173c77973f899e76b3f65f43f31b45481de746e70daa4ecdcbb0b190567c27f3496d69cb5093d35522243b284ac5d4931fbe09a0
6
+ metadata.gz: c9949f67cb683d0df35fa6bd881beecf359de97178ecac6af45b58616f556627322f0f9a33d801e99a62ae459d1ff2cafc771e81818ccf37484df4c6c93de9b8
7
+ data.tar.gz: b3a4996ac405a16015d94cdfb0853b15153669f690c625eca6ade4de6c8b0591cab3d079403e62b3ee48c88f0c1cf3c6578fc085d760d6ce83bb9f3932e4dd4d
@@ -22,8 +22,9 @@ module DwcAgent
22
22
  \b[,;]?\s*(?i:importer|gift)\:?\b|
23
23
  \b[,;]?\s*(?i:string)\b|
24
24
  \b[,;]?\s*(?i:person\s*string)\b|
25
+ ^(?i:colln?)\.?\s+|\s*(?i:colln?)\.?\s*$|
26
+ ^(?i:collection)\:?\s+|\s*(?i:collection)\s*$|
25
27
  \b[,;]?\s*(?i:colls)\.(\b|\z)|
26
- \b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
27
28
  \b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
28
29
  [,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
29
30
  May|Jun|Jul|Aug|Sept?|
@@ -53,7 +54,7 @@ module DwcAgent
53
54
  \d+\s+(?i:Nov|Novemb(er|re))\.?\b|
54
55
  \d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
55
56
  \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
56
- \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
57
+ \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
57
58
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
58
59
  \b\s*(?i:maybe)\s*\b|
59
60
  \b\s*(?i:prob)\.\s*\b|
@@ -149,6 +150,7 @@ module DwcAgent
149
150
  \b(?i:dupl?\.?(\s+by)?|duplicate(\s+by)?)\s*\b|
150
151
  \b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
151
152
  \b(?i:in?dentified(\s+by)?)\s*\b|
153
+ \b(?i:in\s+coll\.?\s*\b)|
152
154
  \b(?i:in\s+part(\s+by)?)\s*\b|
153
155
  \b(?i:och)\s*\b|
154
156
  \b(?i:prep\.?\s+(?i:by)?)\s*\b|
@@ -162,7 +164,7 @@ module DwcAgent
162
164
  }x
163
165
 
164
166
  POST_STRIP_TIDY = %r{
165
- ^\s*[&,;]\s*
167
+ ^\s*[&,;.]\s*
166
168
  }x
167
169
 
168
170
  CHAR_SUBS = {
@@ -210,6 +212,7 @@ module DwcAgent
210
212
 
211
213
  SEPARATORS = {
212
214
  "^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]{1,})$" => "\\2 \\3 \\1",
215
+ "^(Mrs?\\.?)\\s+&\\s+(Mrs?\\.?)\\s+(.*)$" => "\\1 \\3 | \\2 \\3",
213
216
  "^([A-Z]{1}\\.\\s*[[:alpha:]]{1,}),\\s*?([A-Z.]{1,})$" => "\\1 \\2",
214
217
  "^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
215
218
  "(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
@@ -229,7 +232,8 @@ module DwcAgent
229
232
  ^(?i:anon)$|
230
233
  (?i:australian?)|
231
234
  (?i:average)|
232
- (?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
235
+ (?i:believe|unclear|ill?egible|suggested|(dis)?agrees?)|approach|
236
+ \b\s*(?i:none)\s*\b|
233
237
  (?i:barcod)|
234
238
  (?i:BgWd)|
235
239
  (?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
@@ -257,7 +261,7 @@ module DwcAgent
257
261
  (?i:geographic)|
258
262
  (?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
259
263
  (?i:univ\.)|
260
- (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker|gamekeeper)|
264
+ \b\s*(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|personnel|staff|family|captain|friends|assistant|worker|gamekeeper)\s*\b|
261
265
  (?i:non\s+pr(é|e)cis(é|e))|
262
266
  (?i:no\s+consta)|
263
267
  (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
@@ -269,16 +273,18 @@ module DwcAgent
269
273
  (?i:recreation|culture)|
270
274
  (?i:renseigné)|
271
275
  (?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
272
- (?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|class|commission|consortium|council|club|exposit|alliance|protective|circle)|
273
- (?i:commercial|company|control|product)|
276
+ (?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|commission|consortium|council|club|exposit|alliance|protective|circle)|
277
+ ^(?i:class)\s*\b|
278
+ (?i:commercial|control|product)|
279
+ ^(?i:company)\s*\b|
274
280
  (?i:sequence\s+data)|
275
281
  (?i:size|large|colou?r)\s+|
276
282
  (?i:skeleton)|
277
- (?i:survey|assessment|station|monitor|stn\.|index|project|bureau|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
283
+ (?i:survey|assessment|station|monitor|stn\.|index|project|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
278
284
  (?i:submersible)|
279
285
  (?i:synonymy?)|
280
286
  (?i:systematic|perspective)|
281
- ^\s*(?i:off|too|the)\s*|
287
+ ^\s*(?i:off|too|the)\s*\b|
282
288
  (?i:taxiderm(ies|y))|
283
289
  (?i:though)|
284
290
  (?i:texas\s+instruments?)\s*?(for)?|
@@ -380,6 +386,7 @@ module DwcAgent
380
386
  "von",
381
387
  "the",
382
388
  "of",
389
+ "van de",
383
390
  "van der"
384
391
  ]
385
392
 
@@ -4,7 +4,7 @@ module DwcAgent
4
4
 
5
5
  MAJOR = 3
6
6
  MINOR = 0
7
- PATCH = 9
7
+ PATCH = 11
8
8
  BUILD = 0
9
9
 
10
10
  def self.version
@@ -13,4 +13,4 @@ module DwcAgent
13
13
 
14
14
  end
15
15
 
16
- end
16
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.9.0
4
+ version: 3.0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-11 00:00:00.000000000 Z
11
+ date: 2023-09-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae