dwc_agent 3.1.2.0 → 3.1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dwc_agent/constants.rb +11 -6
- data/lib/dwc_agent/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e3b1973a169c546b64c5551867b86ff9d48b8306e8dd6829cdf2a28ddef9b36
|
4
|
+
data.tar.gz: 2c9bff52e29d4794f445119a237287c11411416fda4f9f067719e27711f8adb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be2eed1bbe762c84f84783186d632d9e53e30e109d779098cd6e3dcbffb7bf97a2236bd49d3413fcc5fc35195b7e887a5251da2738ddfbd8dd80217834e61313
|
7
|
+
data.tar.gz: 37820d8ab2a25fcbb4dca64180d5876c41c62a8143bddae3a28656e07f4c7ac4365f97d8e8c7faee92dc49892d9d8dbe11c0bbbaf671bbd3a2bab2d08f974c98
|
data/lib/dwc_agent/constants.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
module DwcAgent
|
2
2
|
STRIP_OUT = %r{
|
3
3
|
(?i:acc\s?\#)|
|
4
|
+
["'-]{2,}|
|
5
|
+
\-\.\s|
|
4
6
|
[,;]?\s*(?i:1st|2nd|3rd|[4-9]th)|
|
5
7
|
\s*?\d+\.\d+|
|
6
8
|
\b\d+\(?(?i:[[:alpha:]])\)?\b|
|
7
9
|
\b[,;]?\s*(?:et\.?\s+al|&\s+al)\.?|
|
8
10
|
\b[,;]?\s*(?i:etal)\.?|
|
11
|
+
\b[,;]?\s*(?i:et.al)\.?|
|
9
12
|
\b\s+(bis|ter)(\b|\z)|
|
10
13
|
\bu\.\s*a\.|
|
11
14
|
\b[,;]?\s*(?i:and|&)?\s*(?i:others|party)\s*\b|
|
@@ -25,6 +28,7 @@ module DwcAgent
|
|
25
28
|
^(?i:collection)\:?\s+|\s*(?i:collection)\s*$|
|
26
29
|
\b[,;]?\s*(?i:colls)\.(\b|\z)|
|
27
30
|
(?i:contactid)|
|
31
|
+
^(?i:dupl)[.,]{1,}|
|
28
32
|
\b[,;]?\s*(?i:stet)[,!]?\s*\d*\z|
|
29
33
|
[,;]?\s*\d+[-/\s+](?i:\d+|Jan|Feb|Mar|Apr|
|
30
34
|
May|Jun|Jul|Aug|Sept?|
|
@@ -103,10 +107,10 @@ module DwcAgent
|
|
103
107
|
(?i:no\s+coll\.?(ector)?)|
|
104
108
|
(?i:not?)\s+(?i:name|date|details?|specific)?\s*?(?i:given|name|date|noted)|
|
105
109
|
(?i:non?)\s+(?i:specificato)|
|
106
|
-
\b[,;]\s+\d
|
110
|
+
\b[,;]\s+\d+\.?\z|
|
107
111
|
[!@?]|
|
112
|
+
\d{1,4}[\/.]?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x|xi|xii)[\/.]\d{1,4}|
|
108
113
|
[,]?\d+|
|
109
|
-
\s+\d+?(\/|\.)?(?i:i|ii|iii|iv|v|vi|vii|viii|ix|x)(\/|\.)\d+|
|
110
114
|
[,;]\z|
|
111
115
|
^\w{0,2}\z|
|
112
116
|
^[A-Z]{2,}\z|
|
@@ -116,7 +120,7 @@ module DwcAgent
|
|
116
120
|
([({].*?[)}])|
|
117
121
|
\s+\[([[:word:]]|[[:space:]]|[-\?\.]){10,}\]|
|
118
122
|
[\(\{][A-Za-z]{1,3}$|
|
119
|
-
\b(?i:leg)[
|
123
|
+
\b(?i:leg)[.:]?(\s|\z)|
|
120
124
|
(?:[Dd](ed|on))[\.:]|
|
121
125
|
\d*[A-Za-z]*\d*-\d*\z|
|
122
126
|
\s+[A-Z]*\d+\z|
|
@@ -133,6 +137,7 @@ module DwcAgent
|
|
133
137
|
}x
|
134
138
|
|
135
139
|
SPLIT_BY = %r{
|
140
|
+
[;,]{2,}|
|
136
141
|
[–|ǀ∣|│&+\/;:]|
|
137
142
|
\s+-\s+|
|
138
143
|
\s+a\.\s+|
|
@@ -147,7 +152,7 @@ module DwcAgent
|
|
147
152
|
\b(?i:checked?(\s+by)?)\s*\b|
|
148
153
|
\b(?i:det\.?(\s+by)?)\s*\b|
|
149
154
|
\b(?i:(donated)?\s*by)\s+|
|
150
|
-
\b(?i:dupl
|
155
|
+
\b(?i:dupl?[.,]?(\s+by)?|duplicate(\s+by)?)\s*\b|
|
151
156
|
\b(?i:ex\.?(\s+by)?|examined(\s+by)?)\s*\b|
|
152
157
|
\b(?i:in?dentified(\s+by)?)\s*\b|
|
153
158
|
\b(?i:in\s+coll\.?\s*\b)|
|
@@ -166,8 +171,8 @@ module DwcAgent
|
|
166
171
|
POST_STRIP_TIDY = %r{
|
167
172
|
^\s*[&,;.]\s*|
|
168
173
|
[\[\]]|
|
169
|
-
^[`'"
|
170
|
-
[`'"]$
|
174
|
+
^[`'".,!?]{1,}|
|
175
|
+
[`'",]{1,}$
|
171
176
|
}x
|
172
177
|
|
173
178
|
CHAR_SUBS = {
|
data/lib/dwc_agent/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David P. Shorthouse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: namae
|