dwc_agent 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +14 -7
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e5662fb6ce9768a7e4842123e7d6bd1f3609383
|
4
|
+
data.tar.gz: bf361f626333393dc5f68a960d6b9f2ceb3b634d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 57cb357cf32164f6c0a50b604e1995626404565888a958842bcc50d32d5a6a9fd24f59d36768e1d5eb18eb8a03b1f9c301df66a174bc1fc93a10e5ae9715bc99
|
7
|
+
data.tar.gz: 2719e50b188b8babf83afccf4f3dfabddc5f7ba3a17716df4a2b2b7e9cf33d72d6e9da1dacfe3e8c818b49e5a3261a53a52875b25c24d6087c7da8e9f9f07cff
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -53,6 +53,7 @@ module DwcAgent
|
|
53
53
|
(?i:crossed\s+out)|
|
54
54
|
\(?(?i:source)\(?|
|
55
55
|
(?i:according\s+to)|
|
56
|
+
(?i:museum\s+victoria)|
|
56
57
|
(?i:revised|photograph|fruits\s+only)|
|
57
58
|
-?\s*(?i:sight\s+(id|identifi?cation))\.?\s*\b|
|
58
59
|
-?\s*(?i:synonym(y|ie))|
|
@@ -60,6 +61,7 @@ module DwcAgent
|
|
60
61
|
\b(?i:to\s+(sub)?spp?)\.?|
|
61
62
|
(?i:nom\.?\s+rev\.?)|
|
62
63
|
FNA|DAO|HUH|FDNMB|\(MT\)|(?i:\(KEW\))|
|
64
|
+
(?i:university|museum|exhibits?)|
|
63
65
|
(?i:uqam)|
|
64
66
|
\b[,;]\s+\d+\z|
|
65
67
|
[":!]|
|
@@ -68,18 +70,19 @@ module DwcAgent
|
|
68
70
|
[,;]\z|
|
69
71
|
^\w{0,2}\z|
|
70
72
|
^[A-Z]{2,}\z|
|
73
|
+
(?i:annot\.?)\b|
|
71
74
|
\s+(?i:stet)\s*!?\s*\z|
|
72
75
|
\s+(?i:prep)\.?\s*\z|
|
73
|
-
\b\s*\([A-Z]{2,}\)
|
76
|
+
\b\s*\([A-Z]{2,}\)|
|
77
|
+
\b[lL]eg[\.:]\s*\b
|
74
78
|
}x
|
75
79
|
|
76
80
|
SPLIT_BY = %r{
|
77
81
|
[–|&+/;]|
|
78
82
|
\s+-\s+|
|
79
83
|
\s+a\.\s+|
|
80
|
-
\b(
|
81
|
-
\
|
82
|
-
\be\s*\b|
|
84
|
+
\b(e|y|en|et|or|per|for)\s*\b|
|
85
|
+
\b(?i:and|with)\s*\b|
|
83
86
|
\b(?i:annotated(\s+by)?)\s*\b|
|
84
87
|
\b(?i:coll\.)\s*\b|
|
85
88
|
\b(?i:communicate?d(\s+to)?)\s*\b|
|
@@ -90,7 +93,6 @@ module DwcAgent
|
|
90
93
|
\b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
|
91
94
|
\b(?i:in?dentified(\s+by)?)\s*\b|
|
92
95
|
\b(?i:in\s+part(\s+by)?)\s*\b|
|
93
|
-
\b(?i:or)\s+|
|
94
96
|
\b(?i:prep\.?\s+(?i:by)?)\s*\b|
|
95
97
|
\b(?i:redet\.?(\s+by?)?)\s*\b|
|
96
98
|
\b(?i:reidentified(\s+by)?)\s*\b|
|
@@ -113,7 +115,11 @@ module DwcAgent
|
|
113
115
|
'#' => '',
|
114
116
|
'/' => ' / ',
|
115
117
|
'&' => ' & ',
|
116
|
-
'*' => ''
|
118
|
+
'*' => '',
|
119
|
+
'>' => '',
|
120
|
+
'<' => '',
|
121
|
+
'{' => '',
|
122
|
+
'}' => ''
|
117
123
|
}
|
118
124
|
|
119
125
|
COMPLEX_SEPARATORS = %r{
|
@@ -143,7 +149,7 @@ module DwcAgent
|
|
143
149
|
(?i:ent(o|y)mology)|
|
144
150
|
(?i:mus(eum|ée)|universit(y|é|e|at)|college|institute?|acad(e|é)m|school|écol(e|iers?)|laboratoi?r|projec?t|polytech|dep(t|art?ment)|research|clinic|hospital|cientifica|sanctuary|safari)|
|
145
151
|
(?i:univ\.)|
|
146
|
-
(?i:graduate|student|
|
152
|
+
(?i:graduate|student|estudi?antes?|labo\.|storekeep|supervisor|superint|rcmp|coordinator|minority|fishermen|police|taxonomist|consultant|participante?s?|team|(é|e)quipe|memb(er|re)|crew|group|staff|personnel|family|captain|friends|assistant|worker)|
|
147
153
|
(?i:non\s+pr(é|e)cis(é|e))|
|
148
154
|
(?i:ontario|qu(e|é)bec|saskatchewan|new brunswick|sault|newfoundland|assurance|vancouver|u\.?s\.?s\.?r\.?)|
|
149
155
|
(?i:recreation|culture)|
|
@@ -160,6 +166,7 @@ module DwcAgent
|
|
160
166
|
\s*(?i:too)\s+|\s*(?i:the)\s+|
|
161
167
|
(?i:taxiderm(ies|y))|
|
162
168
|
(?i:though)|
|
169
|
+
(?:tropical)|
|
163
170
|
(?i:toward|seen at)|
|
164
171
|
(?i:unidentified|unspecified|unk?nown|unnamed|unread|unmistak|no agent)|
|
165
172
|
(?i:urn\:)|
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|