dwc_agent 1.5.0.4 → 1.5.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +17 -10
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0445f35092b28cfbcc273d01f90eee34c9642d7fc8b9c0b7e32e7720dc2f316b
|
|
4
|
+
data.tar.gz: f10b0b6424007a829196851ece90d6138693dcd1825b6b5712a96b9b9a2ce224
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 69d6e2be09b6edc55b5ebd0fef931dd5fcaf9df09b150bcd4580a9aace8c24576db65ea194352c134418f974b4cbfc0bf3254a1d4e22132f56203afcc06dbc08
|
|
7
|
+
data.tar.gz: 467f50b23cb2d3cf8ad406b9cd14c3bdbd6635806c46304be7a0708a03755c23db56f0295dd2d89f8325dbc7ac1a3c34b237103974bb27a62ab38446838b21bd
|
data/lib/dwc_agent/constants.rb
CHANGED
|
@@ -6,15 +6,16 @@ module DwcAgent
|
|
|
6
6
|
\s*?\d+\.\d+|
|
|
7
7
|
\b\d+\(?(?i:[[:alpha:]])\)?\b|
|
|
8
8
|
\b[,;]?\s*(?i:et\.?\s+al|&\s+al)\.?|
|
|
9
|
+
\b[,;]?\s*(?i:etal)\.?|
|
|
9
10
|
\b\s+(bis|ter)(\b|\z)|
|
|
10
11
|
\bu\.\s*a\.|
|
|
11
|
-
\b[,;]?\s*(?i:and|&)?\s*(?i:others)\s*\b|
|
|
12
|
+
\b[,;]?\s*(?i:and|&)?\s*(?i:others|party)\s*\b|
|
|
12
13
|
\b[,;]?\s*(?i:etc)\.?|
|
|
13
14
|
\b[,;]?\s*(?i:on)\b|
|
|
14
15
|
\b[,;]?\s*(?i:unkn?own)\b|
|
|
15
16
|
\b[,;]?\s*(?i:n/a)\b|
|
|
16
17
|
\b[,;]?\s*(?i:ann?onymous)\b|
|
|
17
|
-
\b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed
|
|
18
|
+
\b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?|presumably)\)?\b|
|
|
18
19
|
\b[,;]?\s*(?i:importer|gift)\:?\b|
|
|
19
20
|
\b[,;]?\s*(?i:string)\b|
|
|
20
21
|
\b[,;]?\s*(?i:person\s*string)\b|
|
|
@@ -83,11 +84,14 @@ module DwcAgent
|
|
|
83
84
|
\b(?i:to\s+(sub)?spp?)\.?|
|
|
84
85
|
(?i:nom\.?\s+rev\.?)|
|
|
85
86
|
FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|USGS|NAWQA|
|
|
87
|
+
\b,?\s*(?i:para|topo|syn)?(?i:type)|
|
|
86
88
|
AFSC\/POLISH\s+SORTING\s+CTR\.?|
|
|
87
89
|
(?i:university|museum|exhibits?)|
|
|
88
90
|
(?i:uqam)|
|
|
89
91
|
(?i:sem\s+(colec?tor|data))|
|
|
90
92
|
(?i:no\s+coll\.?(ector)?)|
|
|
93
|
+
(?i:not?)\s+(?i:name|date|details?|specific)?\s*?(?i:given|name|date|noted)|
|
|
94
|
+
(?i:non?)\s+(?i:specificato)|
|
|
91
95
|
\b[,;]\s+\d+\z|
|
|
92
96
|
["!@?]|
|
|
93
97
|
[,]?\d+|
|
|
@@ -158,22 +162,24 @@ module DwcAgent
|
|
|
158
162
|
'}' => '',
|
|
159
163
|
'@' => '',
|
|
160
164
|
'%' => '',
|
|
161
|
-
'\\' => ''
|
|
165
|
+
'\\' => '',
|
|
166
|
+
'´' => '\'',
|
|
167
|
+
'+' => ' | '
|
|
162
168
|
}
|
|
163
169
|
|
|
164
170
|
PHRASE_SUBS = {
|
|
165
171
|
', ph.d.' => ' Ph.D.',
|
|
166
172
|
', Ph.D.' => ' Ph.D.',
|
|
167
173
|
', bro.' => ' Bro.',
|
|
168
|
-
'
|
|
169
|
-
' jr,' => ' Jr.;',
|
|
170
|
-
'-jr' => ' Jr.',
|
|
171
|
-
'-Jr' => ' Jr.',
|
|
174
|
+
', Jr.,' => ' Jr.;',
|
|
172
175
|
', Jr.' => ' Jr.',
|
|
173
176
|
',Jr.' => ' Jr.',
|
|
174
177
|
', Sr.' => ' Sr.',
|
|
175
|
-
',Sr.' => ' Sr.'
|
|
176
|
-
|
|
178
|
+
',Sr.' => ' Sr.',
|
|
179
|
+
' jr.,' => ' Jr.;',
|
|
180
|
+
' jr,' => ' Jr.;',
|
|
181
|
+
'-jr' => ' Jr.',
|
|
182
|
+
'-Jr' => ' Jr.'
|
|
177
183
|
}
|
|
178
184
|
|
|
179
185
|
COMPLEX_SEPARATORS = %r{
|
|
@@ -266,6 +272,7 @@ module DwcAgent
|
|
|
266
272
|
"determination",
|
|
267
273
|
"dissection",
|
|
268
274
|
"entered",
|
|
275
|
+
"indecipherable",
|
|
269
276
|
"nomenclatural",
|
|
270
277
|
"orig",
|
|
271
278
|
"registration",
|
|
@@ -277,7 +284,7 @@ module DwcAgent
|
|
|
277
284
|
"has not"
|
|
278
285
|
]
|
|
279
286
|
|
|
280
|
-
TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|
|
|
287
|
+
TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|major|capt|cmdr|lt|sgt|cpl|pvt|proff?|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
|
|
281
288
|
|
|
282
289
|
APPELLATION = /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
|
283
290
|
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dwc_agent
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.5.0.
|
|
4
|
+
version: 1.5.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David P. Shorthouse
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-08-
|
|
11
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: namae
|