dwc_agent 1.5.0 → 1.5.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87cfd6b6ab9ee83156f9503e48669691d486f143f64a623f93a40f064d2dc7f7
4
- data.tar.gz: 7f40287aa6bf7e7b90408d72594fd9ab5498401c5ee402da6f2a78163f011a9a
3
+ metadata.gz: 0445f35092b28cfbcc273d01f90eee34c9642d7fc8b9c0b7e32e7720dc2f316b
4
+ data.tar.gz: f10b0b6424007a829196851ece90d6138693dcd1825b6b5712a96b9b9a2ce224
5
5
  SHA512:
6
- metadata.gz: 4cb91ffc7530bd1a93fbbc70c716ad96d49cab139bf95507e3a688566ce997cb71510b13013bb07cb52dffe2ce7419d294392028eedd28f1caba8b032eb98c9a
7
- data.tar.gz: ae226c63106a2b41899b6d4c85c92aa372d8d1cbce8a90023cf9b8d1a4c153a28bd33a9eb8064c492fa79d5d4229ec8979d5ca4f751d97fc1ec09132b0e46ca3
6
+ metadata.gz: 69d6e2be09b6edc55b5ebd0fef931dd5fcaf9df09b150bcd4580a9aace8c24576db65ea194352c134418f974b4cbfc0bf3254a1d4e22132f56203afcc06dbc08
7
+ data.tar.gz: 467f50b23cb2d3cf8ad406b9cd14c3bdbd6635806c46304be7a0708a03755c23db56f0295dd2d89f8325dbc7ac1a3c34b237103974bb27a62ab38446838b21bd
@@ -30,7 +30,7 @@ module DwcAgent
30
30
  return blank_name
31
31
  end
32
32
 
33
- if parsed_namae.given && parsed_namae.given.length > 25
33
+ if parsed_namae.given && parsed_namae.given.length > 35
34
34
  return blank_name
35
35
  end
36
36
 
@@ -85,8 +85,8 @@ module DwcAgent
85
85
  given = parsed_namae.given.strip rescue nil
86
86
  particle = parsed_namae.particle.strip rescue nil
87
87
  appellation = parsed_namae.appellation.strip rescue nil
88
- suffix = parsed_names.suffix.strip rescue nil
89
- title = parsed_names.title.strip rescue nil
88
+ suffix = parsed_namae.suffix.strip rescue nil
89
+ title = parsed_namae.title.strip rescue nil
90
90
 
91
91
  if !given.nil? && given.match(/[A-Z]\.[A-Za-z]{2,}/)
92
92
  given = given.gsub(".", ". ").strip
@@ -126,7 +126,7 @@ module DwcAgent
126
126
  return blank_name
127
127
  end
128
128
 
129
- { title: nil, appellation: nil, given: given, particle: particle, family: family, suffix: nil }
129
+ { title: title, appellation: appellation, given: given, particle: particle, family: family, suffix: suffix }
130
130
  end
131
131
 
132
132
  end
@@ -6,15 +6,16 @@ module DwcAgent
6
6
  \s*?\d+\.\d+|
7
7
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
8
8
  \b[,;]?\s*(?i:et\.?\s+al|&\s+al)\.?|
9
+ \b[,;]?\s*(?i:etal)\.?|
9
10
  \b\s+(bis|ter)(\b|\z)|
10
11
  \bu\.\s*a\.|
11
- \b[,;]?\s*(?i:and|&)?\s*(?i:others)\s*\b|
12
+ \b[,;]?\s*(?i:and|&)?\s*(?i:others|party)\s*\b|
12
13
  \b[,;]?\s*(?i:etc)\.?|
13
14
  \b[,;]?\s*(?i:on)\b|
14
15
  \b[,;]?\s*(?i:unkn?own)\b|
15
16
  \b[,;]?\s*(?i:n/a)\b|
16
17
  \b[,;]?\s*(?i:ann?onymous)\b|
17
- \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?)\)?\b|
18
+ \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?|presumably)\)?\b|
18
19
  \b[,;]?\s*(?i:importer|gift)\:?\b|
19
20
  \b[,;]?\s*(?i:string)\b|
20
21
  \b[,;]?\s*(?i:person\s*string)\b|
@@ -49,7 +50,7 @@ module DwcAgent
49
50
  \d+\s+(?i:Nov|Novemb(er|re))\.?\b|
50
51
  \d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
51
52
  \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Virginia|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
52
- \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|China|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Denmark|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|France|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Georgia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Israel|Italy|Jamaica|Japan|Jersey|Jordan|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Montserrat|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Portugal|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Spain|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)|
53
+ \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|China|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Denmark|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|France|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Georgia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Israel|Italy|Jamaica|Japan|Jersey|Jordan|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Montserrat|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Portugal|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Spain|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
53
54
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
54
55
  \b\s*(?i:maybe)\s*\b|
55
56
  \b\s*(?i:prob)\.\s*\b|
@@ -61,7 +62,7 @@ module DwcAgent
61
62
  (?i:see\s+notes?\s*(inside)?)|
62
63
  (?i:see\s+letter\s+enclosed)|
63
64
  (?i:(by)?\s+correspondance)|
64
- (?i:pers\.?\s+comm\.?)|
65
+ (?i:pers\.?\s*comm\.?)|
65
66
  (?i:crossed\s+out)|
66
67
  \(?(?i:source)\(?|
67
68
  (?i:according\s+to)|
@@ -83,11 +84,14 @@ module DwcAgent
83
84
  \b(?i:to\s+(sub)?spp?)\.?|
84
85
  (?i:nom\.?\s+rev\.?)|
85
86
  FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|USGS|NAWQA|
87
+ \b,?\s*(?i:para|topo|syn)?(?i:type)|
86
88
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
87
89
  (?i:university|museum|exhibits?)|
88
90
  (?i:uqam)|
89
91
  (?i:sem\s+(colec?tor|data))|
90
92
  (?i:no\s+coll\.?(ector)?)|
93
+ (?i:not?)\s+(?i:name|date|details?|specific)?\s*?(?i:given|name|date|noted)|
94
+ (?i:non?)\s+(?i:specificato)|
91
95
  \b[,;]\s+\d+\z|
92
96
  ["!@?]|
93
97
  [,]?\d+|
@@ -113,19 +117,22 @@ module DwcAgent
113
117
  [–|ǀ∣|│&+\/;:]|
114
118
  \s+-\s+|
115
119
  \s+a\.\s+|
116
- \b(e|y|i|en|et|or|per|for)\s*\b|
120
+ \b(con|e|y|i|en|et|or|per|for)\s*\b|
117
121
  \b(?i:and|with)\s*\b|
118
122
  \b(?i:annotated(\s+by)?)\s*\b|
119
123
  \b(?i:coll\.)\s*\b|
120
124
  \b(?i:communicate?d(\s+to)?)\s*\b|
121
125
  \b(?i:conf\.?(\s+by)?|confirmed(\s+by)?)\s*\b|
126
+ \b(?i:confirmada)(\s+por)?\s*\b|
122
127
  \b(?i:checked?(\s+by)?)\s*\b|
123
128
  \b(?i:det\.?(\s+by)?)\s*\b|
124
129
  \b(?i:dupl?\.?(\s+by)?|duplicate(\s+by)?)\s*\b|
125
130
  \b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
126
131
  \b(?i:in?dentified(\s+by)?)\s*\b|
127
132
  \b(?i:in\s+part(\s+by)?)\s*\b|
133
+ \b(?i:och)\s*\b|
128
134
  \b(?i:prep\.?\s+(?i:by)?)\s*\b|
135
+ \b(?i:purchased?)(\s+by)?\s*\b|
129
136
  \b(?i:redet\.?(\s+by?)?)\s*\b|
130
137
  \b(?i:reidentified(\s+by)?)\s*\b|
131
138
  \b(?i:stet)\s*\b|
@@ -155,16 +162,24 @@ module DwcAgent
155
162
  '}' => '',
156
163
  '@' => '',
157
164
  '%' => '',
158
- '\\' => ''
165
+ '\\' => '',
166
+ '´' => '\'',
167
+ '+' => ' | '
159
168
  }
160
169
 
161
170
  PHRASE_SUBS = {
162
- 'prof\.' => 'Prof. ',
163
- '\, ph\.d\.' => ' Ph.D.',
164
- '\, bro\.' => ' Bro.',
165
- ' jr\.\,' => ' Jr.;',
166
- ' jr\,' => ' Jr.;',
167
- '\-jr' => ' Jr.'
171
+ ', ph.d.' => ' Ph.D.',
172
+ ', Ph.D.' => ' Ph.D.',
173
+ ', bro.' => ' Bro.',
174
+ ', Jr.,' => ' Jr.;',
175
+ ', Jr.' => ' Jr.',
176
+ ',Jr.' => ' Jr.',
177
+ ', Sr.' => ' Sr.',
178
+ ',Sr.' => ' Sr.',
179
+ ' jr.,' => ' Jr.;',
180
+ ' jr,' => ' Jr.;',
181
+ '-jr' => ' Jr.',
182
+ '-Jr' => ' Jr.'
168
183
  }
169
184
 
170
185
  COMPLEX_SEPARATORS = %r{
@@ -257,6 +272,7 @@ module DwcAgent
257
272
  "determination",
258
273
  "dissection",
259
274
  "entered",
275
+ "indecipherable",
260
276
  "nomenclatural",
261
277
  "orig",
262
278
  "registration",
@@ -268,7 +284,7 @@ module DwcAgent
268
284
  "has not"
269
285
  ]
270
286
 
271
- TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
287
+ TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|major|capt|cmdr|lt|sgt|cpl|pvt|proff?|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
272
288
 
273
289
  APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
274
290
 
@@ -20,7 +20,7 @@ module DwcAgent
20
20
  @strip_out_regex = Regexp.new STRIP_OUT.to_s
21
21
  @residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
22
22
  @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
23
- @phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.join('|').to_s, Regexp::IGNORECASE
23
+ @phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
24
24
  @complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
25
25
  @add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
26
26
  end
@@ -4,7 +4,7 @@ module DwcAgent
4
4
  MAJOR = 1
5
5
  MINOR = 5
6
6
  PATCH = 0
7
- BUILD = nil
7
+ BUILD = 5
8
8
 
9
9
  def self.version
10
10
  [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.5.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-06 00:00:00.000000000 Z
11
+ date: 2020-08-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae