dwc_agent 1.4.6 → 1.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +19 -9
- data/lib/dwc_agent/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7f6e68d702ce0526d2e03c6299cf6b4683a85c0539aa1637b1d8d12e2e0bc05
|
4
|
+
data.tar.gz: c47fa534fec2f64c0b308971291a8c5d7fb637cb8487d782bbc8523ed9e5efa2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 782189bcf8e90a397ebcebb1c176bec521ef9df482e25cfdcd8084612ff45c3a644771088b58741317fd31b65241908045189988fe497e459a7ef66f12de54ca
|
7
|
+
data.tar.gz: 3be7a71b49471a2c7172ccc04d81947caf280f2d5621dc634fb7cc45ba853d2395126613d4fd5f3800251d31ce2bd6627fb1820de9cad5e76f34730522a2daf4
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -2,9 +2,10 @@ module DwcAgent
|
|
2
2
|
STRIP_OUT = %r{
|
3
3
|
^[\[{(]|
|
4
4
|
[\]})]\??$|
|
5
|
+
(?i:acc\s?\#)|
|
5
6
|
\s*?\d+\.\d+|
|
6
7
|
\b\d+\(?(?i:[[:alpha:]])\)?\b|
|
7
|
-
\b[,;]?\s*(?i:et\.?\s+al)\.?|
|
8
|
+
\b[,;]?\s*(?i:et\.?\s+al|&\s+al)\.?|
|
8
9
|
\b\s+(bis|ter)(\b|\z)|
|
9
10
|
\bu\.\s*a\.|
|
10
11
|
\b[,;]?\s*(?i:and|&)?\s*(?i:others)\s*\b|
|
@@ -13,7 +14,7 @@ module DwcAgent
|
|
13
14
|
\b[,;]?\s*(?i:unkn?own)\b|
|
14
15
|
\b[,;]?\s*(?i:n/a)\b|
|
15
16
|
\b[,;]?\s*(?i:ann?onymous)\b|
|
16
|
-
\b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit)\)?\b|
|
17
|
+
\b[,;]?\s*\(?(?i:undetermined|indeterminable|dummy|interim|accession|ill(eg|is)ible|scripsit|presumed?)\)?\b|
|
17
18
|
\b[,;]?\s*(?i:importer|gift)\:?\b|
|
18
19
|
\b[,;]?\s*(?i:string)\b|
|
19
20
|
\b[,;]?\s*(?i:person\s*string)\b|
|
@@ -53,6 +54,7 @@ module DwcAgent
|
|
53
54
|
\b\s*(?i:prob)\.\s*\b|
|
54
55
|
\(?[,]?\s*?(?i:(local)?\s?collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
|
55
56
|
\b[.-–,;:]?\s*(?i:department|faculty)\s*?(?i:of)?\s*?(?i:entomology|biology|zoology)|
|
57
|
+
(?i:Engº|Agrº|Fcº|Drº|Mº|Profº|Dº|Fº)|
|
56
58
|
(?i:fide)\:?\s*\b|
|
57
59
|
(?i:game\s+dept)\.?\s*\b|
|
58
60
|
(?i:see\s+notes?\s*(inside)?)|
|
@@ -71,7 +73,7 @@ module DwcAgent
|
|
71
73
|
(?i:Field\s+Museum\s+of\s+Natural\s+History)|
|
72
74
|
(?i:American\s+Museum\s+of\s+Natural\s+History)|
|
73
75
|
(?i:The\s+Paleontological\s+Research\s+Institution)|
|
74
|
-
(?i:
|
76
|
+
(?i:museums?\s+victoria)|
|
75
77
|
\b\s*(?i:United\s+States|Russia)\s*\b|
|
76
78
|
(?i:revised|photograph|fruits\s+only)|
|
77
79
|
-?\s*(?i:sight\s+(id|identifi?cation))\.?\s*\b|
|
@@ -79,7 +81,7 @@ module DwcAgent
|
|
79
81
|
\b\s*\(?(?i:(fe)?male)\)?\s*\b|
|
80
82
|
\b(?i:to\s+(sub)?spp?)\.?|
|
81
83
|
(?i:nom\.?\s+rev\.?)|
|
82
|
-
FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|
|
84
|
+
FNA|DAO|HUH|FDNMB|MNHN|PNI|USNM|ZMUC|CSIRO|ACAD|USGS|NAWQA|
|
83
85
|
AFSC\/POLISH\s+SORTING\s+CTR\.?|
|
84
86
|
(?i:university|museum|exhibits?)|
|
85
87
|
(?i:uqam)|
|
@@ -107,7 +109,7 @@ module DwcAgent
|
|
107
109
|
}x
|
108
110
|
|
109
111
|
SPLIT_BY = %r{
|
110
|
-
[
|
112
|
+
[–|ǀ∣|│&+\/;:]|
|
111
113
|
\s+-\s+|
|
112
114
|
\s+a\.\s+|
|
113
115
|
\b(e|y|i|en|et|or|per|for)\s*\b|
|
@@ -134,6 +136,9 @@ module DwcAgent
|
|
134
136
|
|
135
137
|
CHAR_SUBS = {
|
136
138
|
'|' => ' | ',
|
139
|
+
'ǀ' => ' | ',
|
140
|
+
'∣' => ' | ',
|
141
|
+
'│' => ' | ',
|
137
142
|
'(' => ' ',
|
138
143
|
')' => ' ',
|
139
144
|
'?' => '',
|
@@ -156,9 +161,13 @@ module DwcAgent
|
|
156
161
|
'dr\.' => 'Dr. ',
|
157
162
|
'mr\.' => 'Mr. ',
|
158
163
|
'mrs\.' => 'Mrs. ',
|
164
|
+
'ms\.' => 'Ms. ',
|
159
165
|
'prof\.' => 'Prof. ',
|
160
166
|
'\, ph\.d\.' => ' Ph.D.',
|
161
|
-
'\, bro\.' => ' Bro.'
|
167
|
+
'\, bro\.' => ' Bro.',
|
168
|
+
' jr\.,' => ' Jr.;',
|
169
|
+
' jr,' => ' Jr.;',
|
170
|
+
'-Jr' => ' Jr.'
|
162
171
|
}
|
163
172
|
|
164
173
|
COMPLEX_SEPARATORS = %r{
|
@@ -174,6 +183,7 @@ module DwcAgent
|
|
174
183
|
(?i:average)|
|
175
184
|
(?i:believe|unclear|ill?egible|none|suggested|(dis)?agrees?)|approach|
|
176
185
|
(?i:barcod)|
|
186
|
+
(?i:BgWd)|
|
177
187
|
(?i:biolog|botan|zoo|ecolog|mycol|(in)?vertebrate|fisheries|genetic|animal|mushroom|wildlife|plumage|flower|agriculture)|
|
178
188
|
(?i:bris?tish|canadi?an?|chinese|arctic|japan|russian|north\s+america)|
|
179
189
|
(?i:carex|salix)|
|
@@ -196,7 +206,7 @@ module DwcAgent
|
|
196
206
|
(?i:geographic)|
|
197
207
|
(?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
|
198
208
|
(?i:univ\.)|
|
199
|
-
(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
|
209
|
+
(?i:graduate|student|élèves?|éleveur|étudiants|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker|gamekeeper)|
|
200
210
|
(?i:non\s+pr(é|e)cis(é|e))|
|
201
211
|
(?i:no\s+consta)|
|
202
212
|
(?i:no\s+(agent)?\s?(data|disponible)(\s+available)?)|
|
@@ -223,7 +233,7 @@ module DwcAgent
|
|
223
233
|
(?i:texas\s+instruments?)\s*?(for)?|
|
224
234
|
(?:tropical)|
|
225
235
|
(?i:toward|seen at)|
|
226
|
-
(?i:unidentified|unspecified|unk?nown
|
236
|
+
(?i:unidentified|unspecified|unk?nown?|unnamed|unread|unmistak|no agent)|
|
227
237
|
(?i:urn\:)|
|
228
238
|
(?i:usda|ucla)|
|
229
239
|
(?i:workshop|garden|farm|jardin|public)|
|
@@ -261,6 +271,6 @@ module DwcAgent
|
|
261
271
|
"has not"
|
262
272
|
]
|
263
273
|
|
264
|
-
TITLE = /\s*\b(sir|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
|
274
|
+
TITLE = /\s*\b(sir|count(ess)?|colonel|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d|rev|mme|abbé|ptre|bro|esq)\.?|docteur|father|cantor|vicar|père|pastor|rabbi|reverend|pere|soeur|sister|professor)(\s+|$)/i
|
265
275
|
|
266
276
|
end
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|
@@ -98,7 +98,7 @@ files:
|
|
98
98
|
- lib/dwc_agent/similarity.rb
|
99
99
|
- lib/dwc_agent/utility.rb
|
100
100
|
- lib/dwc_agent/version.rb
|
101
|
-
homepage: https://github.com/
|
101
|
+
homepage: https://github.com/bionomia/dwc_agent
|
102
102
|
licenses:
|
103
103
|
- MIT
|
104
104
|
metadata: {}
|