dwc_agent 1.4.5 → 1.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +14 -7
- data/lib/dwc_agent/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9776505a303866cf6fb7ffed89c26f0d654c09bc89c0546de763084c55d483d
|
4
|
+
data.tar.gz: 1552777d4f2b4b8f6a72052a817dd7396e384f7b3f42b1dab1c4db7802dbfee7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed8284bf695e107e8aa19af434376d7db3ad96235e1b5fc0f394a321edf527dd911ac9daa768630d4719e3ddcef0168077aa3c0a0d98180b29fc89cf116b4184
|
7
|
+
data.tar.gz: 67498c5b597e6ef3619a90e1b336a30c2840c33d71bb0c7a38b06a9db99d58e3caadcde0c292d65b88146dcd2a17be6ac4e8f5b77916ac578083e0ea841fb498
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -2,9 +2,10 @@ module DwcAgent
|
|
2
2
|
STRIP_OUT = %r{
|
3
3
|
^[\[{(]|
|
4
4
|
[\]})]\??$|
|
5
|
+
(?i:acc\s?\#)|
|
5
6
|
\s*?\d+\.\d+|
|
6
7
|
\b\d+\(?(?i:[[:alpha:]])\)?\b|
|
7
|
-
\b[,;]?\s*(?i:et\.?\s+al)\.?|
|
8
|
+
\b[,;]?\s*(?i:et\.?\s+al|&\s+al)\.?|
|
8
9
|
\b\s+(bis|ter)(\b|\z)|
|
9
10
|
\bu\.\s*a\.|
|
10
11
|
\b[,;]?\s*(?i:and|&)?\s*(?i:others)\s*\b|
|
@@ -67,11 +68,11 @@ module DwcAgent
|
|
67
68
|
ORCID|
|
68
69
|
MRI(\s|-)PAS|
|
69
70
|
urn\:qm\.qld\.gov\.au\:collector|
|
70
|
-
(?i:University\s+of\s+California
|
71
|
+
(?i:University\s+of\s+(Southern\s+)?California(,\s+Berkeley)?)|
|
71
72
|
(?i:Field\s+Museum\s+of\s+Natural\s+History)|
|
72
73
|
(?i:American\s+Museum\s+of\s+Natural\s+History)|
|
73
74
|
(?i:The\s+Paleontological\s+Research\s+Institution)|
|
74
|
-
(?i:
|
75
|
+
(?i:museums?\s+victoria)|
|
75
76
|
\b\s*(?i:United\s+States|Russia)\s*\b|
|
76
77
|
(?i:revised|photograph|fruits\s+only)|
|
77
78
|
-?\s*(?i:sight\s+(id|identifi?cation))\.?\s*\b|
|
@@ -79,7 +80,7 @@ module DwcAgent
|
|
79
80
|
\b\s*\(?(?i:(fe)?male)\)?\s*\b|
|
80
81
|
\b(?i:to\s+(sub)?spp?)\.?|
|
81
82
|
(?i:nom\.?\s+rev\.?)|
|
82
|
-
FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|
|
83
|
+
FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|USGS|NAWQA|
|
83
84
|
AFSC\/POLISH\s+SORTING\s+CTR\.?|
|
84
85
|
(?i:university|museum|exhibits?)|
|
85
86
|
(?i:uqam)|
|
@@ -107,7 +108,7 @@ module DwcAgent
|
|
107
108
|
}x
|
108
109
|
|
109
110
|
SPLIT_BY = %r{
|
110
|
-
[
|
111
|
+
[–|ǀ∣|│&+\/;:]|
|
111
112
|
\s+-\s+|
|
112
113
|
\s+a\.\s+|
|
113
114
|
\b(e|y|i|en|et|or|per|for)\s*\b|
|
@@ -134,6 +135,9 @@ module DwcAgent
|
|
134
135
|
|
135
136
|
CHAR_SUBS = {
|
136
137
|
'|' => ' | ',
|
138
|
+
'ǀ' => ' | ',
|
139
|
+
'∣' => ' | ',
|
140
|
+
'│' => ' | ',
|
137
141
|
'(' => ' ',
|
138
142
|
')' => ' ',
|
139
143
|
'?' => '',
|
@@ -174,6 +178,7 @@ module DwcAgent
|
|
174
178
|
(?i:average)|
|
175
179
|
(?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
|
176
180
|
(?i:barcod)|
|
181
|
+
(?i:BgWd)|
|
177
182
|
(?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
|
178
183
|
(?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
|
179
184
|
(?i:carex|salix)|
|
@@ -196,8 +201,9 @@ module DwcAgent
|
|
196
201
|
(?i:geographic)|
|
197
202
|
(?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
|
198
203
|
(?i:univ\.)|
|
199
|
-
(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
|
204
|
+
(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker|gamekeeper)|
|
200
205
|
(?i:non\s+pr(é|e)cis(é|e))|
|
206
|
+
(?i:no\s+consta)|
|
201
207
|
(?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
|
202
208
|
(?i:not?\s+(entered|stated))|
|
203
209
|
(?i:nomenclatur(e|al)\s+adjustment)|
|
@@ -222,7 +228,7 @@ module DwcAgent
|
|
222
228
|
(?i:texas\s+instruments?)\s*?(for)?|
|
223
229
|
(?:tropical)|
|
224
230
|
(?i:toward|seen at)|
|
225
|
-
(?i:unidentified|unspecified|unk?nown
|
231
|
+
(?i:unidentified|unspecified|unk?nown?|unnamed|unread|unmistak|no agent)|
|
226
232
|
(?i:urn\:)|
|
227
233
|
(?i:usda|ucla)|
|
228
234
|
(?i:workshop|garden|farm|jardin|public)|
|
@@ -242,6 +248,7 @@ module DwcAgent
|
|
242
248
|
"of",
|
243
249
|
"adjustment",
|
244
250
|
"available",
|
251
|
+
"arachnology",
|
245
252
|
"catalogue",
|
246
253
|
"curators",
|
247
254
|
"data",
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|
@@ -98,7 +98,7 @@ files:
|
|
98
98
|
- lib/dwc_agent/similarity.rb
|
99
99
|
- lib/dwc_agent/utility.rb
|
100
100
|
- lib/dwc_agent/version.rb
|
101
|
-
homepage: https://github.com/
|
101
|
+
homepage: https://github.com/bionomia/dwc_agent
|
102
102
|
licenses:
|
103
103
|
- MIT
|
104
104
|
metadata: {}
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: '0'
|
121
121
|
requirements: []
|
122
|
-
rubygems_version: 3.0.
|
122
|
+
rubygems_version: 3.0.6
|
123
123
|
signing_key:
|
124
124
|
specification_version: 4
|
125
125
|
summary: Parse Darwin Core agent terms such as recordedBy and identifiedBy
|