dwc_agent 3.0.9.0 → 3.0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +6 -3
- data/lib/dwc_agent/version.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf7946475612999b141445b40ee1b93c1252d3f987b65eab8523238716ab162e
|
4
|
+
data.tar.gz: '09d2c2353fc63d86bdb327cfc8cd29e33be945d2eb4a6d147c55e31e30c80c14'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ce194f8823af1f1a594a43a0f37eb2e08ea51dbec107fb3c7d19dceb04e6579efe30d7a5bbaafbf964f1110debd8b4a8bb2abe53a55c9f99965da0a6cc32f8d3
|
7
|
+
data.tar.gz: 7f6b7b0fe8c23cd1e0bbef6eefa1488300867f7acbb8b039bef65f0c597011a85aad7f926652ce99993a0fa6bbebbc4eef99f73769031714e4f5e0002d5c6737
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -22,8 +22,9 @@ module DwcAgent
|
|
22
22
|
\b[,;]?\s*(?i:importer|gift)\:?\b|
|
23
23
|
\b[,;]?\s*(?i:string)\b|
|
24
24
|
\b[,;]?\s*(?i:person\s*string)\b|
|
25
|
+
^(?i:colln?)\.?\s+|\s*(?i:colln?)\.?\s*$|
|
26
|
+
^(?i:collection)\:?\s+|\s*(?i:collection)\s*$|
|
25
27
|
\b[,;]?\s*(?i:colls)\.(\b|\z)|
|
26
|
-
\b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
|
27
28
|
\b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
|
28
29
|
[,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
|
29
30
|
May|Jun|Jul|Aug|Sept?|
|
@@ -53,7 +54,7 @@ module DwcAgent
|
|
53
54
|
\d+\s+(?i:Nov|Novemb(er|re))\.?\b|
|
54
55
|
\d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
|
55
56
|
\b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
|
56
|
-
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|
|
57
|
+
\b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Isle of Man|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Russia|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
|
57
58
|
(?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
|
58
59
|
\b\s*(?i:maybe)\s*\b|
|
59
60
|
\b\s*(?i:prob)\.\s*\b|
|
@@ -149,6 +150,7 @@ module DwcAgent
|
|
149
150
|
\b(?i:dupl?\.?(\s+by)?|duplicate(\s+by)?)\s*\b|
|
150
151
|
\b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
|
151
152
|
\b(?i:in?dentified(\s+by)?)\s*\b|
|
153
|
+
\b(?i:in\s+coll\.?\s*\b)|
|
152
154
|
\b(?i:in\s+part(\s+by)?)\s*\b|
|
153
155
|
\b(?i:och)\s*\b|
|
154
156
|
\b(?i:prep\.?\s+(?i:by)?)\s*\b|
|
@@ -162,7 +164,7 @@ module DwcAgent
|
|
162
164
|
}x
|
163
165
|
|
164
166
|
POST_STRIP_TIDY = %r{
|
165
|
-
^\s*[
|
167
|
+
^\s*[&,;.]\s*
|
166
168
|
}x
|
167
169
|
|
168
170
|
CHAR_SUBS = {
|
@@ -210,6 +212,7 @@ module DwcAgent
|
|
210
212
|
|
211
213
|
SEPARATORS = {
|
212
214
|
"^(\\S{4,}),\\s+(Mrs?\\.|MRS?\\.)\\s+([A-Za-z\\.\\s]{1,})$" => "\\2 \\3 \\1",
|
215
|
+
"^(Mrs?\\.?)\\s+&\\s+(Mrs?\\.?)\\s+(.*)$" => "\\1 \\3 | \\2 \\3",
|
213
216
|
"^([A-Z]{1}\\.\\s*[[:alpha:]]{1,}),\\s*?([A-Z.]{1,})$" => "\\1 \\2",
|
214
217
|
"^(\\S{4,},\\s+(?:\\S\\.\\s*){1,})\\s+(\\S{4,},\\s+(?:\\S\.\\s*){1,})$" => "\\1 | \\2",
|
215
218
|
"(\\S{1}\\.)([[:alpha:]]{2,})" => "\\1 \\2",
|
data/lib/dwc_agent/version.rb
CHANGED