dwc_agent 1.4.6 → 1.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1cf058ee8ca5956a4d05ca0122d5ee9a7497b304576bd4af92fd0f0597846a08
4
- data.tar.gz: 3ef7bafb6a3bd9b9af826f161448b259739ec791b4158e5704f6d4da98af48fe
3
+ metadata.gz: b7f6e68d702ce0526d2e03c6299cf6b4683a85c0539aa1637b1d8d12e2e0bc05
4
+ data.tar.gz: c47fa534fec2f64c0b308971291a8c5d7fb637cb8487d782bbc8523ed9e5efa2
5
5
  SHA512:
6
- metadata.gz: 0be4b1c78b8fbf61c1a5363ce32efa780372b394bf88e9fd5a5fc42be77eea564d865637feaca08b63712d8780c515db2408e01967519fc014a3b6f5468793c9
7
- data.tar.gz: 37095ef2920f06e6152aa58aab26034ce5b00d15778a8ae3270c7672c7e53ba2853223f7f67bfbfc5953d06050b8f2a8c8a2a1f0c3e41ba002aa18411a57092c
6
+ metadata.gz: 782189bcf8e90a397ebcebb1c176bec521ef9df482e25cfdcd8084612ff45c3a644771088b58741317fd31b65241908045189988fe497e459a7ef66f12de54ca
7
+ data.tar.gz: 3be7a71b49471a2c7172ccc04d81947caf280f2d5621dc634fb7cc45ba853d2395126613d4fd5f3800251d31ce2bd6627fb1820de9cad5e76f34730522a2daf4
@@ -2,9 +2,10 @@ module DwcAgent
2
2
  STRIP_OUT = %r{
3
3
  ^[\[{(]|
4
4
  [\]})]\??$|
5
+ (?i:acc\s?\#)|
5
6
  \s*?\d+\.\d+|
6
7
  \b\d+\(?(?i:[[:alpha:]])\)?\b|
7
- \b[,;]?\s*(?i:et\.?\s+al)\.?|
8
+ \b[,;]?\s*(?i:et\.?\s+al|&\s+al)\.?|
8
9
  \b\s+(bis|ter)(\b|\z)|
9
10
  \bu\.\s*a\.|
10
11
  \b[,;]?\s*(?i:and|&)?\s*(?i:others)\s*\b|
@@ -13,7 +14,7 @@ module DwcAgent
13
14
  \b[,;]?\s*(?i:unkn?own)\b|
14
15
  \b[,;]?\s*(?i:n/a)\b|
15
16
  \b[,;]?\s*(?i:ann?onymous)\b|
16
- \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit)\)?\b|
17
+ \b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?)\)?\b|
17
18
  \b[,;]?\s*(?i:importer|gift)\:?\b|
18
19
  \b[,;]?\s*(?i:string)\b|
19
20
  \b[,;]?\s*(?i:person\s*string)\b|
@@ -53,6 +54,7 @@ module DwcAgent
53
54
  \b\s*(?i:prob)\.\s*\b|
54
55
  \(?[,]?\s*?(?i:(local)?\s?collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
55
56
  \b[.-–,;:]?\s*(?i:department|faculty)\s*?(?i:of)?\s*?(?i:entomology|biology|zoology)|
57
+ (?i:Engº|Agrº|Fcº|Drº|Mº|Profº|Dº|Fº)|
56
58
  (?i:fide)\:?\s*\b|
57
59
  (?i:game\s+dept)\.?\s*\b|
58
60
  (?i:see\s+notes?\s*(inside)?)|
@@ -71,7 +73,7 @@ module DwcAgent
71
73
  (?i:Field\s+Museum\s+of\s+Natural\s+History)|
72
74
  (?i:American\s+Museum\s+of\s+Natural\s+History)|
73
75
  (?i:The\s+Paleontological\s+Research\s+Institution)|
74
- (?i:museum\s+victoria)|
76
+ (?i:museums?\s+victoria)|
75
77
  \b\s*(?i:United\s+States|Russia)\s*\b|
76
78
  (?i:revised|photograph|fruits\s+only)|
77
79
  -?\s*(?i:sight\s+(id|identifi?cation))\.?\s*\b|
@@ -79,7 +81,7 @@ module DwcAgent
79
81
  \b\s*\(?(?i:(fe)?male)\)?\s*\b|
80
82
  \b(?i:to\s+(sub)?spp?)\.?|
81
83
  (?i:nom\.?\s+rev\.?)|
82
- FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|
84
+ FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|USGS|NAWQA|
83
85
  AFSC\/POLISH\s+SORTING\s+CTR\.?|
84
86
  (?i:university|museum|exhibits?)|
85
87
  (?i:uqam)|
@@ -107,7 +109,7 @@ module DwcAgent
107
109
  }x
108
110
 
109
111
  SPLIT_BY = %r{
110
- [–|&+/;:]|
112
+ [–|ǀ∣|│&+\/;:]|
111
113
  \s+-\s+|
112
114
  \s+a\.\s+|
113
115
  \b(e|y|i|en|et|or|per|for)\s*\b|
@@ -134,6 +136,9 @@ module DwcAgent
134
136
 
135
137
  CHAR_SUBS = {
136
138
  '|' => ' | ',
139
+ 'ǀ' => ' | ',
140
+ '∣' => ' | ',
141
+ '│' => ' | ',
137
142
  '(' => ' ',
138
143
  ')' => ' ',
139
144
  '?' => '',
@@ -156,9 +161,13 @@ module DwcAgent
156
161
  'dr\.' => 'Dr. ',
157
162
  'mr\.' => 'Mr. ',
158
163
  'mrs\.' => 'Mrs. ',
164
+ 'ms\.' => 'Ms. ',
159
165
  'prof\.' => 'Prof. ',
160
166
  '\, ph\.d\.' => ' Ph.D.',
161
- '\, bro\.' => ' Bro.'
167
+ '\, bro\.' => ' Bro.',
168
+ ' jr\.,' => ' Jr.;',
169
+ ' jr,' => ' Jr.;',
170
+ '-Jr' => ' Jr.'
162
171
  }
163
172
 
164
173
  COMPLEX_SEPARATORS = %r{
@@ -174,6 +183,7 @@ module DwcAgent
174
183
  (?i:average)|
175
184
  (?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
176
185
  (?i:barcod)|
186
+ (?i:BgWd)|
177
187
  (?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
178
188
  (?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
179
189
  (?i:carex|salix)|
@@ -196,7 +206,7 @@ module DwcAgent
196
206
  (?i:geographic)|
197
207
  (?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
198
208
  (?i:univ\.)|
199
- (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
209
+ (?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker|gamekeeper)|
200
210
  (?i:non\s+pr(é|e)cis(é|e))|
201
211
  (?i:no\s+consta)|
202
212
  (?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
@@ -223,7 +233,7 @@ module DwcAgent
223
233
  (?i:texas\s+instruments?)\s*?(for)?|
224
234
  (?:tropical)|
225
235
  (?i:toward|seen at)|
226
- (?i:unidentified|unspecified|unk?nown|unnamed|unread|unmistak|no agent)|
236
+ (?i:unidentified|unspecified|unk?nown?|unnamed|unread|unmistak|no agent)|
227
237
  (?i:urn\:)|
228
238
  (?i:usda|ucla)|
229
239
  (?i:workshop|garden|farm|jardin|public)|
@@ -261,6 +271,6 @@ module DwcAgent
261
271
  "has not"
262
272
  ]
263
273
 
264
- TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
274
+ TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
265
275
 
266
276
  end
@@ -3,7 +3,7 @@ module DwcAgent
3
3
 
4
4
  MAJOR = 1
5
5
  MINOR = 4
6
- PATCH = 6
6
+ PATCH = 11
7
7
  BUILD = nil
8
8
 
9
9
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.6
4
+ version: 1.4.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-11 00:00:00.000000000 Z
11
+ date: 2020-07-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: namae
@@ -98,7 +98,7 @@ files:
98
98
  - lib/dwc_agent/similarity.rb
99
99
  - lib/dwc_agent/utility.rb
100
100
  - lib/dwc_agent/version.rb
101
- homepage: https://github.com/dshorthouse/dwc_agent
101
+ homepage: https://github.com/bionomia/dwc_agent
102
102
  licenses:
103
103
  - MIT
104
104
  metadata: {}