dwc_agent 3.0.9.0 → 3.0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +16 -9
- data/lib/dwc_agent/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84e34d440f575183168540f7849d523d7a5938dc5e96105c728b206d278e6a9a
|
4
|
+
data.tar.gz: c03d40835ea1fc03be60bd042ffcdf62d45ef1e5ad02076162fe73f03dbeca68
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c9949f67cb683d0df35fa6bd881beecf359de97178ecac6af45b58616f556627322f0f9a33d801e99a62ae459d1ff2cafc771e81818ccf37484df4c6c93de9b8
|
7
|
+
data.tar.gz: b3a4996ac405a16015d94cdfb0853b15153669f690c625eca6ade4de6c8b0591cab3d079403e62b3ee48c88f0c1cf3c6578fc085d760d6ce83bb9f3932e4dd4d
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -22,8 +22,9 @@ module DwcAgent
|
|
22
22
|
\b[,;]?\s*(?i:importer|gift)\:?\b|
|
23
23
|
\b[,;]?\s*(?i:string)\b|
|
24
24
|
\b[,;]?\s*(?i:person\s*string)\b|
|
25
|
+
^(?i:colln?)\.?\s+|\s*(?i:colln?)\.?\s*$|
|
26
|
+
^(?i:collection)\:?\s+|\s*(?i:collection)\s*$|
|
25
27
|
\b[,;]?\s*(?i:colls)\.(\b|\z)|
|
26
|
-
\b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
|
27
28
|
\b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
|
28
29
|
[,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
|
29
30
|
May|Jun|Jul|Aug|Sept?|
|
@@ -53,7 +54,7 @@ module DwcAgent
|
|
53
54
|
\d+\s+(?i:Nov|Novemb(er|re))\.?\b|
|
54
55
|
\d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
|
55
56
|
\b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
|
56
|
-
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|
|
57
|
+
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
|
57
58
|
(?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
|
58
59
|
\b\s*(?i:maybe)\s*\b|
|
59
60
|
\b\s*(?i:prob)\.\s*\b|
|
@@ -149,6 +150,7 @@ module DwcAgent
|
|
149
150
|
\b(?i:dupl?\.?(\s+by)?|duplicate(\s+by)?)\s*\b|
|
150
151
|
\b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
|
151
152
|
\b(?i:in?dentified(\s+by)?)\s*\b|
|
153
|
+
\b(?i:in\s+coll\.?\s*\b)|
|
152
154
|
\b(?i:in\s+part(\s+by)?)\s*\b|
|
153
155
|
\b(?i:och)\s*\b|
|
154
156
|
\b(?i:prep\.?\s+(?i:by)?)\s*\b|
|
@@ -162,7 +164,7 @@ module DwcAgent
|
|
162
164
|
}x
|
163
165
|
|
164
166
|
POST_STRIP_TIDY = %r{
|
165
|
-
^\s*[
|
167
|
+
^\s*[&,;.]\s*
|
166
168
|
}x
|
167
169
|
|
168
170
|
CHAR_SUBS = {
|
@@ -210,6 +212,7 @@ module DwcAgent
|
|
210
212
|
|
211
213
|
SEPARATORS = {
|
212
214
|
"^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]{1,})$" => "\\2 \\3 \\1",
|
215
|
+
"^(Mrs?\\.?)\\s+&\\s+(Mrs?\\.?)\\s+(.*)$" => "\\1 \\3 | \\2 \\3",
|
213
216
|
"^([A-Z]{1}\\.\\s*[[:alpha:]]{1,}),\\s*?([A-Z.]{1,})$" => "\\1 \\2",
|
214
217
|
"^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
|
215
218
|
"(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
|
@@ -229,7 +232,8 @@ module DwcAgent
|
|
229
232
|
^(?i:anon)$|
|
230
233
|
(?i:australian?)|
|
231
234
|
(?i:average)|
|
232
|
-
(?i:believe|unclear|ill?egible|
|
235
|
+
(?i:believe|unclear|ill?egible|suggested|(dis)?agrees?)|approach|
|
236
|
+
\b\s*(?i:none)\s*\b|
|
233
237
|
(?i:barcod)|
|
234
238
|
(?i:BgWd)|
|
235
239
|
(?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
|
@@ -257,7 +261,7 @@ module DwcAgent
|
|
257
261
|
(?i:geographic)|
|
258
262
|
(?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
|
259
263
|
(?i:univ\.)|
|
260
|
-
(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|
|
264
|
+
\b\s*(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|personnel|staff|family|captain|friends|assistant|worker|gamekeeper)\s*\b|
|
261
265
|
(?i:non\s+pr(é|e)cis(é|e))|
|
262
266
|
(?i:no\s+consta)|
|
263
267
|
(?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
|
@@ -269,16 +273,18 @@ module DwcAgent
|
|
269
273
|
(?i:recreation|culture)|
|
270
274
|
(?i:renseigné)|
|
271
275
|
(?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
|
272
|
-
(?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|
|
273
|
-
(?i:
|
276
|
+
(?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|commission|consortium|council|club|exposit|alliance|protective|circle)|
|
277
|
+
^(?i:class)\s*\b|
|
278
|
+
(?i:commercial|control|product)|
|
279
|
+
^(?i:company)\s*\b|
|
274
280
|
(?i:sequence\s+data)|
|
275
281
|
(?i:size|large|colou?r)\s+|
|
276
282
|
(?i:skeleton)|
|
277
|
-
(?i:survey|assessment|station|monitor|stn\.|index|project|
|
283
|
+
(?i:survey|assessment|station|monitor|stn\.|index|project|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
|
278
284
|
(?i:submersible)|
|
279
285
|
(?i:synonymy?)|
|
280
286
|
(?i:systematic|perspective)|
|
281
|
-
^\s*(?i:off|too|the)\s
|
287
|
+
^\s*(?i:off|too|the)\s*\b|
|
282
288
|
(?i:taxiderm(ies|y))|
|
283
289
|
(?i:though)|
|
284
290
|
(?i:texas\s+instruments?)\s*?(for)?|
|
@@ -380,6 +386,7 @@ module DwcAgent
|
|
380
386
|
"von",
|
381
387
|
"the",
|
382
388
|
"of",
|
389
|
+
"van de",
|
383
390
|
"van der"
|
384
391
|
]
|
385
392
|
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|