dwc_agent 3.0.12.0 → 3.0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +12 -10
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76fd38a17b4eacd1ccc83894969945f385e75c0be9c7e6a1235ab483c893ec1c
|
4
|
+
data.tar.gz: 4aee795631565aa42a692f699f3de412fc1148010ed9490390420287fcaf639d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 79985675d5f45be65a7d98dc36d209736109cd01f3051afbea06dfc0b00bcdf19bc49ed41c4916aa2d13de6810194a1b56adccdbda6c8610ca3bb089a59e7aae
|
7
|
+
data.tar.gz: 81bc9a7c075cfb9274b0ab18e88c18df1f4eb8227a403f5341f8b5a57e80a9138f3c81c837ebcf2e841f28bacd5ba8d2430132c23d6019cbe387f65dc5e93176
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -54,7 +54,7 @@ module DwcAgent
|
|
54
54
|
\d+\s+(?i:Nov|Novemb(er|re))\.?\b|
|
55
55
|
\d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
|
56
56
|
\b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
|
57
|
-
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|
|
57
|
+
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
|
58
58
|
(?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
|
59
59
|
\b\s*(?i:maybe)\s*\b|
|
60
60
|
\b\s*(?i:prob)\.\s*\b|
|
@@ -113,8 +113,8 @@ module DwcAgent
|
|
113
113
|
(?i:annot)\.?\s*?\b|
|
114
114
|
\s+(?i:stet)\s*!?\s*\z|
|
115
115
|
\s+(?i:prep)\.?\s*\z|
|
116
|
-
|
117
|
-
(\
|
116
|
+
\W([({\[].*?[)}\]])|
|
117
|
+
\W[\(\[\{][A-Za-z]{1,3}$|
|
118
118
|
\b(?i:leg)[\.:]?\s*\b|
|
119
119
|
(?:[Dd](ed|on))[\.:]|
|
120
120
|
\d*[A-Za-z]*\d*-\d*\z|
|
@@ -128,7 +128,6 @@ module DwcAgent
|
|
128
128
|
\:?\s*(?i:exch)(\b|\z)|
|
129
129
|
\s+de\s*$|
|
130
130
|
\.{2,}$|
|
131
|
-
\[|\]|
|
132
131
|
[^[:alnum:][:blank:][:punct:][∣´|ǀ∣|│`~$^+|<>]] # Removes emojis from string
|
133
132
|
}x
|
134
133
|
|
@@ -164,7 +163,8 @@ module DwcAgent
|
|
164
163
|
}x
|
165
164
|
|
166
165
|
POST_STRIP_TIDY = %r{
|
167
|
-
^\s*[&,;.]\s
|
166
|
+
^\s*[&,;.]\s*|
|
167
|
+
[\[\]]
|
168
168
|
}x
|
169
169
|
|
170
170
|
CHAR_SUBS = {
|
@@ -207,7 +207,8 @@ module DwcAgent
|
|
207
207
|
'-jr' => ' Jr.',
|
208
208
|
'-Jr' => ' Jr.',
|
209
209
|
'Dr.' => 'Dr. ',
|
210
|
-
'prof.' => 'Prof. '
|
210
|
+
'prof.' => 'Prof. ',
|
211
|
+
' .;' => '. ;'
|
211
212
|
}
|
212
213
|
|
213
214
|
SEPARATORS = {
|
@@ -217,7 +218,7 @@ module DwcAgent
|
|
217
218
|
"^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
|
218
219
|
"(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
|
219
220
|
"^([[:alpha:]]{2,})(?:\\s+)((?:\\S{1}\\.\\s?){1,})$" => "\\1, \\2",
|
220
|
-
"([[:alpha:]]*),?\\s*(.*)\\s+(van|von)$" => "\\3 \\1, \\2",
|
221
|
+
"([[:alpha:]]*),?\\s*(.*)\\s+(van|von|v\\.|v(a|o)n\\s+der?)$" => "\\3 \\1, \\2",
|
221
222
|
"^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]’`'-]{2,})\\s+([[:alpha:]’`'-]{2,})$" => "\\1 \\4 | \\2 \\3 \\4",
|
222
223
|
"^([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\3 | \\2 \\3 | \\4",
|
223
224
|
"^([A-Z.\\s]+),\\s+([A-Z.\\s]+)\\s+(?:and|&|et|e)\\s+([A-Z.\\s]+)\\s+([[:alpha:]’`'-]{2,})(.*)$" => "\\1 \\4 | \\2 \\4 | \\3 \\4 | \\5",
|
@@ -281,7 +282,8 @@ module DwcAgent
|
|
281
282
|
(?i:sequence\s+data)|
|
282
283
|
(?i:size|large|colou?r)\s+|
|
283
284
|
(?i:skeleton)|
|
284
|
-
(?i:survey|assessment|station|monitor|stn\.|
|
285
|
+
(?i:survey|assessment|station|monitor|stn\.|project|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
|
286
|
+
^(?i:index)\s*\b|
|
285
287
|
(?i:submersible)|
|
286
288
|
(?i:synonymy?)|
|
287
289
|
(?i:systematic|perspective)|
|
@@ -310,7 +312,6 @@ module DwcAgent
|
|
310
312
|
"dos",
|
311
313
|
"du",
|
312
314
|
"el",
|
313
|
-
"le",
|
314
315
|
"la",
|
315
316
|
"van",
|
316
317
|
"von",
|
@@ -388,7 +389,8 @@ module DwcAgent
|
|
388
389
|
"the",
|
389
390
|
"of",
|
390
391
|
"van de",
|
391
|
-
"van der"
|
392
|
+
"van der",
|
393
|
+
"von der"
|
392
394
|
]
|
393
395
|
|
394
396
|
end
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09
|
11
|
+
date: 2023-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|