dwc_agent 3.0.12.0 → 3.0.14.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e293fff35d22252ef51e375df3aa3daa48405ae175f45529ac90006d5edc48d3
4
- data.tar.gz: 54ebf850b428d4cf2bb6c0c17bc41c90aa0982db90f133dcdbc2077c6f1277d4
3
+ metadata.gz: 76fd38a17b4eacd1ccc83894969945f385e75c0be9c7e6a1235ab483c893ec1c
4
+ data.tar.gz: 4aee795631565aa42a692f699f3de412fc1148010ed9490390420287fcaf639d
5
5
  SHA512:
6
- metadata.gz: 9438247b18b2813c9643b8a272ce38b5b4083fac30791a66bc1df33920b987f586978e578d8bbfb0795991e13ed712bf1e1eaafb637df7e92a414dc10852762f
7
- data.tar.gz: 972b20669e401cc1c748f3e3135a49363c2964288ffce92cdc8131300f228ea7d75c11a82b4438d3167ab8901dbcd79e5156fe2c60a13d321dede70ff59d9315
6
+ metadata.gz: 79985675d5f45be65a7d98dc36d209736109cd01f3051afbea06dfc0b00bcdf19bc49ed41c4916aa2d13de6810194a1b56adccdbda6c8610ca3bb089a59e7aae
7
+ data.tar.gz: 81bc9a7c075cfb9274b0ab18e88c18df1f4eb8227a403f5341f8b5a57e80a9138f3c81c837ebcf2e841f28bacd5ba8d2430132c23d6019cbe387f65dc5e93176
@@ -54,7 +54,7 @@ module DwcAgent
54
54
  \d+\s+(?i:Nov|Novemb(er|re))\.?\b|
55
55
  \d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
56
56
  \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
57
- \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
57
+ \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
58
58
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
59
59
  \b\s*(?i:maybe)\s*\b|
60
60
  \b\s*(?i:prob)\.\s*\b|
@@ -113,8 +113,8 @@ module DwcAgent
113
113
  (?i:annot)\.?\s*?\b|
114
114
  \s+(?i:stet)\s*!?\s*\z|
115
115
  \s+(?i:prep)\.?\s*\z|
116
- (\(|\{|\[).{1,}(\)|\]|\})|
117
- (\(|\[|\{).{1,}\z|
116
+ \W([({\[].*?[)}\]])|
117
+ \W[\(\[\{][A-Za-z]{1,3}$|
118
118
  \b(?i:leg)[\.:]?\s*\b|
119
119
  (?:[Dd](ed|on))[\.:]|
120
120
  \d*[A-Za-z]*\d*-\d*\z|
@@ -128,7 +128,6 @@ module DwcAgent
128
128
  \:?\s*(?i:exch)(\b|\z)|
129
129
  \s+de\s*$|
130
130
  \.{2,}$|
131
- \[|\]|
132
131
  [^[:alnum:][:blank:][:punct:][∣´|ǀ∣|│`~$^+|<>]] # Removes emojis from string
133
132
  }x
134
133
 
@@ -164,7 +163,8 @@ module DwcAgent
164
163
  }x
165
164
 
166
165
  POST_STRIP_TIDY = %r{
167
- ^\s*[&,;.]\s*
166
+ ^\s*[&,;.]\s*|
167
+ [\[\]]
168
168
  }x
169
169
 
170
170
  CHAR_SUBS = {
@@ -207,7 +207,8 @@ module DwcAgent
207
207
  '-jr' => ' Jr.',
208
208
  '-Jr' => ' Jr.',
209
209
  'Dr.' => 'Dr. ',
210
- 'prof.' => 'Prof. '
210
+ 'prof.' => 'Prof. ',
211
+ ' .;' => '. ;'
211
212
  }
212
213
 
213
214
  SEPARATORS = {
@@ -217,7 +218,7 @@ module DwcAgent
217
218
  "^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
218
219
  "(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
219
220
  "^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?){1,})$" => "\\1, \\2",
220
- "([[:alpha:]]*),?\\s*(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
221
+ "([[:alpha:]]*),?\\s*(.*)\\s+(van|von|v\\.|v(a|o)n\\s+der?)$" => "\\3 \\1, \\2",
221
222
  "^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]’`'-]{2,})\\s+([[:alpha:]’`'-]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
222
223
  "^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
223
224
  "^([A-Z.\\s]+),\\s+([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\4 | \\2 \\4 | \\3 \\4 | \\5",
@@ -281,7 +282,8 @@ module DwcAgent
281
282
  (?i:sequence\s+data)|
282
283
  (?i:size|large|colou?r)\s+|
283
284
  (?i:skeleton)|
284
- (?i:survey|assessment|station|monitor|stn\.|index|project|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
285
+ (?i:survey|assessment|station|monitor|stn\.|project|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
286
+ ^(?i:index)\s*\b|
285
287
  (?i:submersible)|
286
288
  (?i:synonymy?)|
287
289
  (?i:systematic|perspective)|
@@ -310,7 +312,6 @@ module DwcAgent
310
312
  "dos",
311
313
  "du",
312
314
  "el",
313
- "le",
314
315
  "la",
315
316
  "van",
316
317
  "von",
@@ -388,7 +389,8 @@ module DwcAgent
388
389
  "the",
389
390
  "of",
390
391
  "van de",
391
- "van der"
392
+ "van der",
393
+ "von der"
392
394
  ]
393
395
 
394
396
  end
@@ -4,7 +4,7 @@ module DwcAgent
4
4
 
5
5
  MAJOR = 3
6
6
  MINOR = 0
7
- PATCH = 12
7
+ PATCH = 14
8
8
  BUILD = 0
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.12.0
4
+ version: 3.0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-18 00:00:00.000000000 Z
11
+ date: 2023-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae