dwc_agent 0.1.16 → 0.1.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/cleaner.rb +4 -0
- data/lib/dwc_agent/constants.rb +14 -7
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3cec6480c674b8445ed7c7faa22cb9635607ac56
|
4
|
+
data.tar.gz: 6ef18ecc234b816bf0127796ef31e63c4ea8e3be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6bd424b6a11aa2849424bd1cc8805d1b43505fd296bcab318980d0e76efecf08d12c1ef092baee86d4296c863dd572a169a0332589ec1326e26434b2e801b7c
|
7
|
+
data.tar.gz: a7e7a047ef29b39a54a1be20e39d92c3d4216de2e0cb4a7d4f7d8bfa58f8c7bac39dee64033efa8a2031707c63ee2d910f512d190faf734e3f8588ee0f0f55bf
|
data/lib/dwc_agent/cleaner.rb
CHANGED
@@ -52,6 +52,10 @@ module DwcAgent
|
|
52
52
|
parsed_namae.given = CapitalizeNames.capitalize(parsed_namae.given)
|
53
53
|
end
|
54
54
|
|
55
|
+
if parsed_namae.given && /\.[A-Z]$/.match(parsed_namae.given)
|
56
|
+
parsed_namae.given += "."
|
57
|
+
end
|
58
|
+
|
55
59
|
if parsed_namae.given && /[A-Za-z]\./.match(parsed_namae.given)
|
56
60
|
parsed_namae.given = CapitalizeNames.capitalize(parsed_namae.given).gsub(/[a-z]\./, &:upcase)
|
57
61
|
end
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -11,10 +11,12 @@ module DwcAgent
|
|
11
11
|
\b[,;]?\s*(?i:n/a)\b|
|
12
12
|
\b[,;]?\s*(?i:ann?onymous)\b|
|
13
13
|
\b[,;]?\s*(?i:undetermined|indeterminable|dummy|interim|accession)\b|
|
14
|
-
\b[,;]?\s*(?i:importer)
|
14
|
+
\b[,;]?\s*(?i:importer|gift)\:?\b|
|
15
15
|
\b[,;]?\s*(?i:frère|frere|père|pere|soeur|sister|bro)\.?(\b|\z)|
|
16
16
|
\b[,;]?\s*(?i:string)\b|
|
17
17
|
\b[,;]?\s*(?i:person\s*string)\b|
|
18
|
+
\b[,;]?\s*(?i:colls)\.(\b|\z)|
|
19
|
+
\b[,;]?\s*(?i:colln?)[:.]?(\b|\z)|
|
18
20
|
(?i:no\s+(data|disponible))|
|
19
21
|
\b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
|
20
22
|
[,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
|
@@ -45,7 +47,8 @@ module DwcAgent
|
|
45
47
|
\d+\s+(?i:Nov|Novemb(er|re))\.?\b|
|
46
48
|
\d+\s+(?i:Dec|D(e|é)cemb(er|re))\.?\b|
|
47
49
|
(?i:autres?\s+de|probab|likely|possibl(e|y)|doubtful)|
|
48
|
-
\b\s*maybe\s*\b|
|
50
|
+
\b\s*(?i:maybe)\s*\b|
|
51
|
+
\b\s*(?i:prob)\.\s*\b|
|
49
52
|
\(?(?i:collector|data\s*recorder|netter|(oper|prepar)ator)\(?s?\)?\.?\:?|
|
50
53
|
(?i:fide)\:?\s*\b|
|
51
54
|
(?i:game\s+dept)\.?\s*\b|
|
@@ -82,14 +85,15 @@ module DwcAgent
|
|
82
85
|
(?i:ded)\:|
|
83
86
|
^[-,.\s;*\d]+\s?|
|
84
87
|
-\d?\z|
|
85
|
-
\s*?-{2,}\s
|
88
|
+
\s*?-{2,}\s*?|
|
89
|
+
^(?i:exc?p?)[:.]\s*
|
86
90
|
}x
|
87
91
|
|
88
92
|
SPLIT_BY = %r{
|
89
93
|
[–|&+/;]|
|
90
94
|
\s+-\s+|
|
91
95
|
\s+a\.\s+|
|
92
|
-
\b(e|y|en|et|or|per|for)\s*\b|
|
96
|
+
\b(e|y|i|en|et|or|per|for)\s*\b|
|
93
97
|
\b(?i:and|with)\s*\b|
|
94
98
|
\b(?i:annotated(\s+by)?)\s*\b|
|
95
99
|
\b(?i:coll\.)\s*\b|
|
@@ -156,13 +160,16 @@ module DwcAgent
|
|
156
160
|
\b\s*(?i:help)\s*\b|
|
157
161
|
(?i:description|drawing|identification|remark|original|illustration|checklist|intermedia|measurement|indisting|series|imperfect)|
|
158
162
|
(?i:desconocido)|
|
163
|
+
(?i:exc?s?icc?at(a|i))|
|
159
164
|
(?i:evidence)|
|
165
|
+
(?i:exporter)|
|
160
166
|
(?i:inconn?u)|
|
161
|
-
(?i:internation|gou?vern|ministry|unit|district|provincial|na(c|t)ional|military|region|environ|natur(e|al)|naturelles|division|program|direction|national)|
|
167
|
+
(?i:internation|gou?vern|ministry|extension|unit|district|provincial|na(c|t)ional|military|region|environ|natur(e|al)|naturelles|division|program|direction|national)|
|
162
168
|
(?i:label)|
|
163
169
|
(?i:o?\.?m\.?n\.?r\.?)|
|
164
170
|
(?i:measurement)|
|
165
171
|
(?i:ent(o|y)mology)|
|
172
|
+
(?i:geographic)|
|
166
173
|
(?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
|
167
174
|
(?i:univ\.)|
|
168
175
|
(?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
|
@@ -170,11 +177,11 @@ module DwcAgent
|
|
170
177
|
(?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
|
171
178
|
(?i:recreation|culture)|
|
172
179
|
(?i:shaped|dark|pale|areas|phase|spotting|interior|between|closer)|
|
173
|
-
(?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|class|commission|consortium|council|club|alliance|protective|circle)|
|
180
|
+
(?i:soci(e|é)t(y|é)|cent(er|re)|community|history|conservation|conference|assoc|class|commission|consortium|council|club|exposit|alliance|protective|circle)|
|
174
181
|
(?i:commercial|company|control|product)|
|
175
182
|
(?i:size|large|colou?r)\s+|
|
176
183
|
(?i:skeleton)|
|
177
|
-
(?i:survey|assessment|station|monitor|stn\.|index|project|bureau|engine|
|
184
|
+
(?i:survey|assessment|station|monitor|stn\.|index|project|bureau|engine|exchange|ex(c|k)ursi(e|o|ó)n?|exped\.?|exp(e|i)di(c|t)i(e|o|ó)n?|experiment|explora(d|t)|festival|generation|inventory|marine|service)|
|
178
185
|
(?i:submersible)|
|
179
186
|
(?i:synonymy?)|(topo|syn|holo)type|
|
180
187
|
(?i:systematic|perspective)|
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|