dwc_agent 1.4.7 → 1.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2975bee8cb8675fa7b6e50bc45f90d4d21855a2a22fe72999220b80af4e9d54d
4
- data.tar.gz: 06b764667fe3235983492f2182eb2dc90d4ad768382e1f1f5eb7384144180a86
3
+ metadata.gz: f0eca9af87fc999b9d1e2313ea88df6bee1aec54a080e46fc8245abd3c41d615
4
+ data.tar.gz: 38f98d4fc7b30a2f80521d7d064666f5da9f64889af7f08b15a852619d1ab3be
5
5
  SHA512:
6
- metadata.gz: 7d026fd7ffc15101bd5f994263c1950115fee1d5ee9a5af89f46ae1bfd5aa7e761216b77dea5a4b225503ed11ae889bc8853aafab98d700532b60723cfeefec1
7
- data.tar.gz: fd2d7986eb1ea1456800f03c6bf78b31ef33e50e743e1c316b873334fa16767e23afa2c463bf3445e23573620b86746ea61e67c890b16d6b8b2673e01f8a6a84
6
+ metadata.gz: 43dafd9ab3d3cf8513a579614d56429551803f15b39a814f25076d09d023f335c74fb8d22aaed49bbc2318587963972d74f035951221d5b12057fc7a0691ae57
7
+ data.tar.gz: 35d99c98fd8d7334e5cdac7bf781f0ad85adb6fd0684ffa6c9c058264ade10d2a6b0b544ceec4c52ca64192833c3edcb896ed29e8f45c4df970680b6db719eaa
@@ -14,7 +14,7 @@ module DwcAgent
14
14
  \b[,;]?\s*(?i:unkn?own)\b|
15
15
  \b[,;]?\s*(?i:n/a)\b|
16
16
  \b[,;]?\s*(?i:ann?onymous)\b|
17
- \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit)\)?\b|
17
+ \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?)\)?\b|
18
18
  \b[,;]?\s*(?i:importer|gift)\:?\b|
19
19
  \b[,;]?\s*(?i:string)\b|
20
20
  \b[,;]?\s*(?i:person\s*string)\b|
@@ -49,11 +49,13 @@ module DwcAgent
49
49
  \d+\s+(?i:Nov|Novemb(er|re))\.?\b|
50
50
  \d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
51
51
  \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Virginia|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
52
+ \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|China|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Denmark|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|France|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Georgia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Israel|Italy|Jamaica|Japan|Jersey|Jordan|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Montserrat|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Portugal|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Spain|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)|
52
53
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
53
54
  \b\s*(?i:maybe)\s*\b|
54
55
  \b\s*(?i:prob)\.\s*\b|
55
56
  \(?[,]?\s*?(?i:(local)?\s?collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
56
57
  \b[.-–,;:]?\s*(?i:department|faculty)\s*?(?i:of)?\s*?(?i:entomology|biology|zoology)|
58
+ (?i:Engº|Agrº|Fcº|Drº|Mº|Profº|Dº|Fº)|
57
59
  (?i:fide)\:?\s*\b|
58
60
  (?i:game\s+dept)\.?\s*\b|
59
61
  (?i:see\s+notes?\s*(inside)?)|
@@ -80,7 +82,7 @@ module DwcAgent
80
82
  \b\s*\(?(?i:(fe)?male)\)?\s*\b|
81
83
  \b(?i:to\s+(sub)?spp?)\.?|
82
84
  (?i:nom\.?\s+rev\.?)|
83
- FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|
85
+ FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|USGS|NAWQA|
84
86
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
85
87
  (?i:university|museum|exhibits?)|
86
88
  (?i:uqam)|
@@ -108,7 +110,7 @@ module DwcAgent
108
110
  }x
109
111
 
110
112
  SPLIT_BY = %r{
111
- [–|&+/;:]|
113
+ [–|ǀ∣|│&+\/;:]|
112
114
  \s+-\s+|
113
115
  \s+a\.\s+|
114
116
  \b(e|y|i|en|et|or|per|for)\s*\b|
@@ -135,6 +137,9 @@ module DwcAgent
135
137
 
136
138
  CHAR_SUBS = {
137
139
  '|' => ' | ',
140
+ 'ǀ' => ' | ',
141
+ '∣' => ' | ',
142
+ '│' => ' | ',
138
143
  '(' => ' ',
139
144
  ')' => ' ',
140
145
  '?' => '',
@@ -157,9 +162,13 @@ module DwcAgent
157
162
  'dr\.' => 'Dr. ',
158
163
  'mr\.' => 'Mr. ',
159
164
  'mrs\.' => 'Mrs. ',
165
+ 'ms\.' => 'Ms. ',
160
166
  'prof\.' => 'Prof. ',
161
167
  '\, ph\.d\.' => ' Ph.D.',
162
- '\, bro\.' => ' Bro.'
168
+ '\, bro\.' => ' Bro.',
169
+ ' jr\.,' => ' Jr.;',
170
+ ' jr,' => ' Jr.;',
171
+ '-Jr' => ' Jr.'
163
172
  }
164
173
 
165
174
  COMPLEX_SEPARATORS = %r{
@@ -198,7 +207,7 @@ module DwcAgent
198
207
  (?i:geographic)|
199
208
  (?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
200
209
  (?i:univ\.)|
201
- (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
210
+ (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker|gamekeeper)|
202
211
  (?i:non\s+pr(é|e)cis(é|e))|
203
212
  (?i:no\s+consta)|
204
213
  (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
@@ -225,7 +234,7 @@ module DwcAgent
225
234
  (?i:texas\s+instruments?)\s*?(for)?|
226
235
  (?:tropical)|
227
236
  (?i:toward|seen at)|
228
- (?i:unidentified|unspecified|unk?nown|unnamed|unread|unmistak|no agent)|
237
+ (?i:unidentified|unspecified|unk?nown?|unnamed|unread|unmistak|no agent)|
229
238
  (?i:urn\:)|
230
239
  (?i:usda|ucla)|
231
240
  (?i:workshop|garden|farm|jardin|public)|
@@ -263,6 +272,6 @@ module DwcAgent
263
272
  "has not"
264
273
  ]
265
274
 
266
- TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
275
+ TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
267
276
 
268
277
  end
@@ -3,7 +3,7 @@ module DwcAgent
3
3
 
4
4
  MAJOR = 1
5
5
  MINOR = 4
6
- PATCH = 7
6
+ PATCH = 12
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.7
4
+ version: 1.4.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-18 00:00:00.000000000 Z
11
+ date: 2020-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae
@@ -98,7 +98,7 @@ files:
98
98
  - lib/dwc_agent/similarity.rb
99
99
  - lib/dwc_agent/utility.rb
100
100
  - lib/dwc_agent/version.rb
101
- homepage: https://github.com/dshorthouse/dwc_agent
101
+ homepage: https://github.com/bionomia/dwc_agent
102
102
  licenses:
103
103
  - MIT
104
104
  metadata: {}