dwc_agent 1.5.0.3 → 1.5.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: afa4821ae8db9f02c41c67b49ee95ef7d3d4961b3c609892058e023871b6ccf1
4
- data.tar.gz: 235651dde097b5fdc167577c0193243f11d891e4f9e47b7e386b435dbf2e1feb
3
+ metadata.gz: 26a7fa1547beb6246aa87724bc5c2d84180a976f6e5406b021692512c993ab73
4
+ data.tar.gz: f689ae5d4dc3f7cf988327261608695ddc5028f58815f499d5e38b964300b43d
5
5
  SHA512:
6
- metadata.gz: 00fa3c0eac444fa4da032223c792b716fd91915788d94ae38b58dc97eb1c528f76e62091500b77953d505359a62ad40c930ab75bb128a6df21d96587b69eadcc
7
- data.tar.gz: 4bedfcaafeb0782fc3a3e374f8ae783290ba12566a05a1724ba3690b2c683ca354a5b0286070fe801540a06a89c66dc6d3e10166b27e328f384f4922b0481dc6
6
+ metadata.gz: 95f21ea53d2bff84f2f034699df0adb86c1e30756129f8ada445e3cff65894b416586029d527de6b704c69623e568400c2228c882283e7b9f2e5b3645289fbeb
7
+ data.tar.gz: 4c169658b93e82bb4a6a1d5cd3283e9405ee90d987943bfa07d693649bb655ef43bca19afff52cfb86e39e39fa180c5705f686af7b88a1dcc57a8fb655d2481a
@@ -18,10 +18,6 @@ module DwcAgent
18
18
  def clean(parsed_namae)
19
19
  blank_name = { title: nil, appellation: nil, given: nil, particle: nil, family: nil, suffix: nil }
20
20
 
21
- if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
22
- return blank_name
23
- end
24
-
25
21
  if parsed_namae.given && GIVEN_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
26
22
  return blank_name
27
23
  end
@@ -79,6 +75,10 @@ module DwcAgent
79
75
  parsed_namae.given = NameCase(parsed_namae.given)
80
76
  end
81
77
 
78
+ if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
79
+ return blank_name
80
+ end
81
+
82
82
  parsed_namae.normalize_initials
83
83
 
84
84
  family = parsed_namae.family.gsub(/\.\z/, '').strip rescue nil
@@ -3,18 +3,20 @@ module DwcAgent
3
3
  ^[\[{(]|
4
4
  [\]})]\??$|
5
5
  (?i:acc\s?\#)|
6
+ [,;]?\s*(?i:1st|2nd|3rd|[4-9]th)|
6
7
  \s*?\d+\.\d+|
7
8
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
8
9
  \b[,;]?\s*(?i:et\.?\s+al|&\s+al)\.?|
10
+ \b[,;]?\s*(?i:etal)\.?|
9
11
  \b\s+(bis|ter)(\b|\z)|
10
12
  \bu\.\s*a\.|
11
- \b[,;]?\s*(?i:and|&)?\s*(?i:others)\s*\b|
13
+ \b[,;]?\s*(?i:and|&)?\s*(?i:others|party)\s*\b|
12
14
  \b[,;]?\s*(?i:etc)\.?|
13
15
  \b[,;]?\s*(?i:on)\b|
14
16
  \b[,;]?\s*(?i:unkn?own)\b|
15
17
  \b[,;]?\s*(?i:n/a)\b|
16
18
  \b[,;]?\s*(?i:ann?onymous)\b|
17
- \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?)\)?\b|
19
+ \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?|presumably)\)?\b|
18
20
  \b[,;]?\s*(?i:importer|gift)\:?\b|
19
21
  \b[,;]?\s*(?i:string)\b|
20
22
  \b[,;]?\s*(?i:person\s*string)\b|
@@ -48,34 +50,38 @@ module DwcAgent
48
50
  \d+\s+(?i:Oct|Octob(er|re))\.?\b|
49
51
  \d+\s+(?i:Nov|Novemb(er|re))\.?\b|
50
52
  \d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
51
- \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Virginia|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
52
- \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|China|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Denmark|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|France|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Georgia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Israel|Italy|Jamaica|Japan|Jersey|Jordan|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Montserrat|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Portugal|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Spain|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
53
+ \b[.-–,;:/]?\s*(?i:Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Evergreen|Florida|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Portland|Rhode\s+Island|South\s+Carolina|South\s+Dakota|St\s+Petersburg|Tennessee|Texas|Utah|Vermont|Washington|West\s+Virginia|Wisconsin|Wyoming)\s+(?i:State)\s*\b|
54
+ \b[.,;:/]?\s*?(?i:Afghanistan|Åland Islands|Albania|Algeria|American Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua and Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bermuda|Bhutan|Bolivia \(Plurinational State of\)|Bonaire, Sint Eustatius and Saba|Bosnia and Herzegovina|Botswana|Bouvet Island|Brazil|British Indian Ocean Territory|Brunei Darussalam|Bulgaria|Burkina Faso|Burundi|Cabo Verde|Cambodia|Cameroon|Canada|Cayman Islands|Central African Republic|Chad|Chile|China|Christmas Island|Cocos \(Keeling\) Islands|Colombia|Comoros|Congo|Congo \(Democratic Republic of the\)|Cook Islands|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Curaçao|Cyprus|Czechia|Denmark|Djibouti|Dominica|Dominican Republic|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Falkland Islands \(Malvinas\)|Faroe Islands|Fiji|Finland|France|French Guiana|French Polynesia|French Southern Territories|Gabon|Gambia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenada|Guadeloupe|Guam|Guatemala|Guernsey|Guinea|Guinea-Bissau|Guyana|Haiti|Heard Island and McDonald Islands|Holy See|Honduras|Hong Kong|Hungary|Iceland|India|Indonesia|Iran \(Islamic Republic of\)|Iraq|Ireland|Isle of Man|Israel|Italy|Jamaica|Japan|Jersey|Kazakhstan|Kenya|Kiribati|Korea \(Democratic People\'s Republic of\)|Korea \(Republic of\)|Kuwait|Kyrgyzstan|Lao People\'s Democratic Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macao|Macedonia (the former Yugoslav Republic of)|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia \(Federated States of\)|Moldova \(Republic of\)|Monaco|Mongolia|Montenegro|Montserrat|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands|New Caledonia|New Zealand|Nicaragua|Niger|Nigeria|Niue|Norfolk Island|Northern Mariana Islands|Norway|Oman|Pakistan|Palau|Palestine, State of|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Portugal|Puerto Rico|Qatar|Réunion|Romania|Russian Federation|Rwanda|Saint Barthélemy|Saint Helena, Ascension and Tristan da Cunha|Saint Kitts and Nevis|Saint Lucia|Saint Martin \(French part\)|Saint Pierre and Miquelon|Saint Vincent and the Grenadines|Samoa|San Marino|Sao Tome and Principe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Sint Maarten \(Dutch part\)|Slovakia|Slovenia|Solomon Islands|Somalia|South Africa|South Georgia and the South Sandwich Islands|South Sudan|Spain|Sri Lanka|Sudan|Suriname|Svalbard and Jan Mayen|Swaziland|Sweden|Switzerland|Syrian Arab Republic|Taiwan|Tajikistan|Tanzania, United Republic of|Thailand|Timor-Leste|Togo|Tokelau|Tonga|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Turks and Caicos Islands|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom of Great Britain and Northern Ireland|United States of America|United States Minor Outlying Islands|Uruguay|Uzbekistan|Vanuatu|Venezuela \(Bolivarian Republic of\)|Viet Nam|Virgin Islands \(British\)|Virgin Islands \(U\.S\.\)|Wallis and Futuna|Western Sahara|Yemen|Zambia|Zimbabwe)\b|
53
55
  (?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
54
56
  \b\s*(?i:maybe)\s*\b|
55
57
  \b\s*(?i:prob)\.\s*\b|
56
- \(?[,]?\s*?(?i:(local)?\s?collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
58
+ \(?[,]?\s*?(?i:(local)?\s?collectors?|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
57
59
  \b[.-–,;:]?\s*(?i:department|faculty)\s*?(?i:of)?\s*?(?i:entomology|biology|zoology)|
58
60
  (?i:Engº|Agrº|Fcº|Drº|Mº|Profº|Dº|Fº)|
59
61
  (?i:fide)\:?\s*\b|
62
+ (?i:first\s+name\s+unknown)|
60
63
  (?i:game\s+dept)\.?\s*\b|
61
64
  (?i:see\s+notes?\s*(inside)?)|
62
65
  (?i:see\s+letter\s+enclosed)|
63
66
  (?i:(by)?\s+correspondance)|
64
- (?i:pers\.?\s+comm\.?)|
67
+ (?i:pers\.?\s*comm\.?)|
65
68
  (?i:crossed\s+out)|
69
+ (?i:(ohne|keine)\s+angaben)|
66
70
  \(?(?i:source)\(?|
67
71
  (?i:according\s+to)|
68
72
  (?i:lanuv)\d+|
73
+ \b\s*name\b|
74
+ \b\s*lost\b|
69
75
  (?i:nswobs)|
70
76
  ORCID|
71
77
  MRI(\s|-)PAS|
72
78
  urn\:qm\.qld\.gov\.au\:collector|
73
79
  (?i:University\s+of\s+(Southern\s+)?California(,\s+Berkeley)?)|
74
- (?i:Field\s+Museum\s+of\s+Natural\s+History)|
75
- (?i:American\s+Museum\s+of\s+Natural\s+History)|
80
+ (?i:field\s+museum\s+of\s+natural\s+history)|
81
+ (?i:american\s+museum\s+of\s+natural\s+history)|
76
82
  (?i:The\s+Paleontological\s+Research\s+Institution)|
77
83
  (?i:museums?\s+victoria)|
78
- \b\s*(?i:United\s+States|Russia)\s*\b|
84
+ \b\s*(?i:united\s+states|russia)\s*\b|
79
85
  (?i:revised|photograph|fruits\s+only)|
80
86
  -?\s*(?i:sight\s+(id|identifi?cation))\.?\s*\b|
81
87
  -?\s*(?i:synonym(y|ie))|
@@ -83,11 +89,14 @@ module DwcAgent
83
89
  \b(?i:to\s+(sub)?spp?)\.?|
84
90
  (?i:nom\.?\s+rev\.?)|
85
91
  FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|USGS|NAWQA|
92
+ \b,?\s*(?i:para|topo|syn)?(?i:type)|
86
93
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
87
94
  (?i:university|museum|exhibits?)|
88
95
  (?i:uqam)|
89
96
  (?i:sem\s+(colec?tor|data))|
90
97
  (?i:no\s+coll\.?(ector)?)|
98
+ (?i:not?)\s+(?i:name|date|details?|specific)?\s*?(?i:given|name|date|noted)|
99
+ (?i:non?)\s+(?i:specificato)|
91
100
  \b[,;]\s+\d+\z|
92
101
  ["!@?]|
93
102
  [,]?\d+|
@@ -113,19 +122,22 @@ module DwcAgent
113
122
  [–|ǀ∣|│&+\/;:]|
114
123
  \s+-\s+|
115
124
  \s+a\.\s+|
116
- \b(e|y|i|en|et|or|per|for)\s*\b|
125
+ \b(con|e|y|i|en|et|or|per|for)\s*\b|
117
126
  \b(?i:and|with)\s*\b|
118
127
  \b(?i:annotated(\s+by)?)\s*\b|
119
128
  \b(?i:coll\.)\s*\b|
120
129
  \b(?i:communicate?d(\s+to)?)\s*\b|
121
130
  \b(?i:conf\.?(\s+by)?|confirmed(\s+by)?)\s*\b|
131
+ \b(?i:confirmada)(\s+por)?\s*\b|
122
132
  \b(?i:checked?(\s+by)?)\s*\b|
123
133
  \b(?i:det\.?(\s+by)?)\s*\b|
124
134
  \b(?i:dupl?\.?(\s+by)?|duplicate(\s+by)?)\s*\b|
125
135
  \b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
126
136
  \b(?i:in?dentified(\s+by)?)\s*\b|
127
137
  \b(?i:in\s+part(\s+by)?)\s*\b|
138
+ \b(?i:och)\s*\b|
128
139
  \b(?i:prep\.?\s+(?i:by)?)\s*\b|
140
+ \b(?i:purchased?)(\s+by)?\s*\b|
129
141
  \b(?i:redet\.?(\s+by?)?)\s*\b|
130
142
  \b(?i:reidentified(\s+by)?)\s*\b|
131
143
  \b(?i:stet)\s*\b|
@@ -155,16 +167,24 @@ module DwcAgent
155
167
  '}' => '',
156
168
  '@' => '',
157
169
  '%' => '',
158
- '\\' => ''
170
+ '\\' => '',
171
+ '´' => '\'',
172
+ '+' => ' | '
159
173
  }
160
174
 
161
175
  PHRASE_SUBS = {
162
- 'prof\.' => 'Prof. ',
163
- '\, ph\.d\.' => ' Ph.D.',
164
- '\, bro\.' => ' Bro.',
165
- ' jr\.\,' => ' Jr.;',
166
- ' jr\,' => ' Jr.;',
167
- '\-jr' => ' Jr.'
176
+ ', ph.d.' => ' Ph.D.',
177
+ ', Ph.D.' => ' Ph.D.',
178
+ ', bro.' => ' Bro.',
179
+ ', Jr.,' => ' Jr.;',
180
+ ', Jr.' => ' Jr.',
181
+ ',Jr.' => ' Jr.',
182
+ ', Sr.' => ' Sr.',
183
+ ',Sr.' => ' Sr.',
184
+ ' jr.,' => ' Jr.;',
185
+ ' jr,' => ' Jr.;',
186
+ '-jr' => ' Jr.',
187
+ '-Jr' => ' Jr.'
168
188
  }
169
189
 
170
190
  COMPLEX_SEPARATORS = %r{
@@ -229,7 +249,7 @@ module DwcAgent
229
249
  (?i:though)|
230
250
  (?i:texas\s+instruments?)\s*?(for)?|
231
251
  (?:tropical)|
232
- (?i:toward|seen at)|
252
+ (?i:toward|seen\s+at)|
233
253
  (?i:unidentified|unspecified|unk?nown?|unnamed|unread|unmistak|no agent)|
234
254
  (?i:urn\:)|
235
255
  (?i:usda|ucla)|
@@ -238,29 +258,47 @@ module DwcAgent
238
258
  }x
239
259
 
240
260
  FAMILY_BLACKLIST = [
261
+ "ap",
241
262
  "da",
263
+ "de",
242
264
  "de'",
243
265
  "del",
244
266
  "der",
267
+ "di",
268
+ "do",
269
+ "dos",
245
270
  "du",
246
271
  "el",
272
+ "le",
273
+ "la",
247
274
  "van",
248
275
  "von",
249
276
  "the",
250
277
  "of",
251
278
  "adjustment",
279
+ "annotator",
252
280
  "available",
253
281
  "arachnology",
254
282
  "catalogue",
255
283
  "curators",
256
284
  "data",
285
+ "details",
286
+ "determiner",
257
287
  "determination",
258
288
  "dissection",
259
289
  "entered",
290
+ "erased",
291
+ "indecipherable",
260
292
  "nomenclatural",
261
293
  "orig",
262
294
  "registration",
263
- "science"
295
+ "science",
296
+ "wg",
297
+ "wm",
298
+ "wn",
299
+ "zw",
300
+ "zz",
301
+ "z-"
264
302
  ]
265
303
 
266
304
  GIVEN_BLACKLIST = [
@@ -268,7 +306,7 @@ module DwcAgent
268
306
  "has not"
269
307
  ]
270
308
 
271
- TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
309
+ TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|major|capt|cmdr|lt|sgt|cpl|pvt|proff?|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
272
310
 
273
311
  APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
274
312
 
@@ -20,7 +20,7 @@ module DwcAgent
20
20
  @strip_out_regex = Regexp.new STRIP_OUT.to_s
21
21
  @residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
22
22
  @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
23
- @phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.join('|').to_s, Regexp::IGNORECASE
23
+ @phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
24
24
  @complex_separators_regex = Regexp.new COMPLEX_SEPARATORS.to_s
25
25
  @add_separators_regex = Regexp.new %r{(\S{1}\.)([[:alpha:]]{2,})}.to_s
26
26
  end
@@ -4,7 +4,7 @@ module DwcAgent
4
4
  MAJOR = 1
5
5
  MINOR = 5
6
6
  PATCH = 0
7
- BUILD = 3
7
+ BUILD = 8
8
8
 
9
9
  def self.version
10
10
  [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0.3
4
+ version: 1.5.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-09 00:00:00.000000000 Z
11
+ date: 2020-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae
@@ -102,7 +102,7 @@ homepage: https://github.com/bionomia/dwc_agent
102
102
  licenses:
103
103
  - MIT
104
104
  metadata: {}
105
- post_install_message:
105
+ post_install_message:
106
106
  rdoc_options:
107
107
  - "--encoding"
108
108
  - UTF-8
@@ -119,8 +119,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
119
  - !ruby/object:Gem::Version
120
120
  version: '0'
121
121
  requirements: []
122
- rubygems_version: 3.0.6
123
- signing_key:
122
+ rubygems_version: 3.1.2
123
+ signing_key:
124
124
  specification_version: 4
125
125
  summary: Parse Darwin Core agent terms such as recordedBy and identifiedBy
126
126
  test_files: []