dwc_agent 3.0.9.0 → 3.0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +16 -9
- data/lib/dwc_agent/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 84e34d440f575183168540f7849d523d7a5938dc5e96105c728b206d278e6a9a
|
|
4
|
+
data.tar.gz: c03d40835ea1fc03be60bd042ffcdf62d45ef1e5ad02076162fe73f03dbeca68
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c9949f67cb683d0df35fa6bd881beecf359de97178ecac6af45b58616f556627322f0f9a33d801e99a62ae459d1ff2cafc771e81818ccf37484df4c6c93de9b8
|
|
7
|
+
data.tar.gz: b3a4996ac405a16015d94cdfb0853b15153669f690c625eca6ade4de6c8b0591cab3d079403e62b3ee48c88f0c1cf3c6578fc085d760d6ce83bb9f3932e4dd4d
|
data/lib/dwc_agent/constants.rb
CHANGED
|
@@ -22,8 +22,9 @@ module DwcAgent
|
|
|
22
22
|
\b[,;]?\s*(?i:importer|gift)\:?\b|
|
|
23
23
|
\b[,;]?\s*(?i:string)\b|
|
|
24
24
|
\b[,;]?\s*(?i:person\s*string)\b|
|
|
25
|
+
^(?i:colln?)\.?\s+|\s*(?i:colln?)\.?\s*$|
|
|
26
|
+
^(?i:collection)\:?\s+|\s*(?i:collection)\s*$|
|
|
25
27
|
\b[,;]?\s*(?i:colls)\.(\b|\z)|
|
|
26
|
-
\b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
|
|
27
28
|
\b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
|
|
28
29
|
[,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
|
|
29
30
|
May|Jun|Jul|Aug|Sept?|
|
|
@@ -53,7 +54,7 @@ module DwcAgent
|
|
|
53
54
|
\d+\s+(?i:Nov|Novemb(er|re))\.?\b|
|
|
54
55
|
\d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
|
|
55
56
|
\b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
|
|
56
|
-
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|
|
|
57
|
+
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
|
|
57
58
|
(?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
|
|
58
59
|
\b\s*(?i:maybe)\s*\b|
|
|
59
60
|
\b\s*(?i:prob)\.\s*\b|
|
|
@@ -149,6 +150,7 @@ module DwcAgent
|
|
|
149
150
|
\b(?i:dupl?\.?(\s+by)?|duplicate(\s+by)?)\s*\b|
|
|
150
151
|
\b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
|
|
151
152
|
\b(?i:in?dentified(\s+by)?)\s*\b|
|
|
153
|
+
\b(?i:in\s+coll\.?\s*\b)|
|
|
152
154
|
\b(?i:in\s+part(\s+by)?)\s*\b|
|
|
153
155
|
\b(?i:och)\s*\b|
|
|
154
156
|
\b(?i:prep\.?\s+(?i:by)?)\s*\b|
|
|
@@ -162,7 +164,7 @@ module DwcAgent
|
|
|
162
164
|
}x
|
|
163
165
|
|
|
164
166
|
POST_STRIP_TIDY = %r{
|
|
165
|
-
^\s*[
|
|
167
|
+
^\s*[&,;.]\s*
|
|
166
168
|
}x
|
|
167
169
|
|
|
168
170
|
CHAR_SUBS = {
|
|
@@ -210,6 +212,7 @@ module DwcAgent
|
|
|
210
212
|
|
|
211
213
|
SEPARATORS = {
|
|
212
214
|
"^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]{1,})$" => "\\2 \\3 \\1",
|
|
215
|
+
"^(Mrs?\\.?)\\s+&\\s+(Mrs?\\.?)\\s+(.*)$" => "\\1 \\3 | \\2 \\3",
|
|
213
216
|
"^([A-Z]{1}\\.\\s*[[:alpha:]]{1,}),\\s*?([A-Z.]{1,})$" => "\\1 \\2",
|
|
214
217
|
"^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
|
|
215
218
|
"(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
|
|
@@ -229,7 +232,8 @@ module DwcAgent
|
|
|
229
232
|
^(?i:anon)$|
|
|
230
233
|
(?i:australian?)|
|
|
231
234
|
(?i:average)|
|
|
232
|
-
(?i:believe|unclear|ill?egible|
|
|
235
|
+
(?i:believe|unclear|ill?egible|suggested|(dis)?agrees?)|approach|
|
|
236
|
+
\b\s*(?i:none)\s*\b|
|
|
233
237
|
(?i:barcod)|
|
|
234
238
|
(?i:BgWd)|
|
|
235
239
|
(?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
|
|
@@ -257,7 +261,7 @@ module DwcAgent
|
|
|
257
261
|
(?i:geographic)|
|
|
258
262
|
(?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
|
|
259
263
|
(?i:univ\.)|
|
|
260
|
-
(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|
|
|
264
|
+
\b\s*(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fisherm(a|e)n|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|personnel|staff|family|captain|friends|assistant|worker|gamekeeper)\s*\b|
|
|
261
265
|
(?i:non\s+pr(é|e)cis(é|e))|
|
|
262
266
|
(?i:no\s+consta)|
|
|
263
267
|
(?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
|
|
@@ -269,16 +273,18 @@ module DwcAgent
|
|
|
269
273
|
(?i:recreation|culture)|
|
|
270
274
|
(?i:renseigné)|
|
|
271
275
|
(?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
|
|
272
|
-
(?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|
|
|
273
|
-
(?i:
|
|
276
|
+
(?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|commission|consortium|council|club|exposit|alliance|protective|circle)|
|
|
277
|
+
^(?i:class)\s*\b|
|
|
278
|
+
(?i:commercial|control|product)|
|
|
279
|
+
^(?i:company)\s*\b|
|
|
274
280
|
(?i:sequence\s+data)|
|
|
275
281
|
(?i:size|large|colou?r)\s+|
|
|
276
282
|
(?i:skeleton)|
|
|
277
|
-
(?i:survey|assessment|station|monitor|stn\.|index|project|
|
|
283
|
+
(?i:survey|assessment|station|monitor|stn\.|index|project|engine|(e|é)x?chang(e|é)s?|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
|
|
278
284
|
(?i:submersible)|
|
|
279
285
|
(?i:synonymy?)|
|
|
280
286
|
(?i:systematic|perspective)|
|
|
281
|
-
^\s*(?i:off|too|the)\s
|
|
287
|
+
^\s*(?i:off|too|the)\s*\b|
|
|
282
288
|
(?i:taxiderm(ies|y))|
|
|
283
289
|
(?i:though)|
|
|
284
290
|
(?i:texas\s+instruments?)\s*?(for)?|
|
|
@@ -380,6 +386,7 @@ module DwcAgent
|
|
|
380
386
|
"von",
|
|
381
387
|
"the",
|
|
382
388
|
"of",
|
|
389
|
+
"van de",
|
|
383
390
|
"van der"
|
|
384
391
|
]
|
|
385
392
|
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dwc_agent
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.0.
|
|
4
|
+
version: 3.0.11.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David P. Shorthouse
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-09-
|
|
11
|
+
date: 2023-09-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: namae
|