biodiversity 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/README.md +9 -0
- data/lib/biodiversity/parser.rb +18 -14
- data/lib/biodiversity/version.rb +1 -1
- data/spec/parser/scientific_name_spec.rb +76 -35
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bd706ba900f6f136fae256b85f55223e955b4300
|
|
4
|
+
data.tar.gz: 00b5ee4e9b8b34d7fcb5084ae4b82e6db8635522
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f34fb2b4e2d7e97f748967f9cec04625416b33789287ac6de7d63de348b676d4504b4c8a631505d7074989aff2d0597b857b0843b3f45cb4c5c3f5d61e7336ca
|
|
7
|
+
data.tar.gz: 8886e2fccfb35456c326e461c7f48d42ef3c5f76d2044219b8e63af2c690851f91cc4c536b2727ffe0684fdf0c006425322d6d7664b7dc7fe4717e02ed17031d
|
data/CHANGELOG
CHANGED
data/README.md
CHANGED
|
@@ -128,6 +128,15 @@ parser.parse(" Plantago major ")[:scientificName][:normalized]
|
|
|
128
128
|
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. \
|
|
129
129
|
Braun & Crous 2003")[:scientificName][:canonical]
|
|
130
130
|
|
|
131
|
+
# to get canonical form with infraspecies ranks
|
|
132
|
+
parsed = parser.parse("Seddera latifolia Hochst. & Steud. var. latifolia")
|
|
133
|
+
ranked = ScientificNameParser.add_rank_to_canonical(parsed)
|
|
134
|
+
ranked[:scientificName][:canonical]
|
|
135
|
+
#or
|
|
136
|
+
parser = ScientificNameParser.new(canonical_with_rank: true)
|
|
137
|
+
ranked = parser.parse("Seddera latifolia Hochst. & Steud. var. latifolia")
|
|
138
|
+
ranked[:scientificName][:canonical]
|
|
139
|
+
|
|
131
140
|
# to get detailed information about elements of the name
|
|
132
141
|
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
|
|
133
142
|
Braun & Crous 2003")[:scientificName][:details]
|
data/lib/biodiversity/parser.rb
CHANGED
|
@@ -115,6 +115,20 @@ class ScientificNameParser
|
|
|
115
115
|
}
|
|
116
116
|
end
|
|
117
117
|
|
|
118
|
+
def self.add_rank_to_canonical(parsed)
|
|
119
|
+
return parsed if parsed[:scientificName][:hybrid]
|
|
120
|
+
name = parsed[:scientificName]
|
|
121
|
+
parts = name[:canonical].split(" ")
|
|
122
|
+
name_ary = parts[0..1]
|
|
123
|
+
name[:details][0][:infraspecies].each do |data|
|
|
124
|
+
infrasp = data[:string]
|
|
125
|
+
rank = data[:rank]
|
|
126
|
+
name_ary << (rank && rank != "n/a" ? "#{rank} #{infrasp}" : infrasp)
|
|
127
|
+
end
|
|
128
|
+
parsed[:scientificName][:canonical] = name_ary.join(" ")
|
|
129
|
+
parsed
|
|
130
|
+
end
|
|
131
|
+
|
|
118
132
|
def self.version
|
|
119
133
|
Biodiversity::VERSION
|
|
120
134
|
end
|
|
@@ -230,13 +244,14 @@ class ScientificNameParser
|
|
|
230
244
|
else
|
|
231
245
|
res.merge!(self)
|
|
232
246
|
end
|
|
247
|
+
res[:surrogate] = true if ScientificNameParser.surrogate?(res)
|
|
248
|
+
res = {:scientificName => res}
|
|
233
249
|
if (canonical_with_rank &&
|
|
234
250
|
canonical.count(" ") > 1 &&
|
|
235
|
-
res[:details][0][:infraspecies])
|
|
251
|
+
res[:scientificName][:details][0][:infraspecies])
|
|
236
252
|
ScientificNameParser.add_rank_to_canonical(res)
|
|
237
253
|
end
|
|
238
|
-
res
|
|
239
|
-
res = {:scientificName => res}
|
|
254
|
+
res
|
|
240
255
|
end
|
|
241
256
|
|
|
242
257
|
def @parsed.pos_json
|
|
@@ -270,15 +285,4 @@ class ScientificNameParser
|
|
|
270
285
|
name.match(surrogate2))
|
|
271
286
|
is_surrogate
|
|
272
287
|
end
|
|
273
|
-
|
|
274
|
-
def self.add_rank_to_canonical(parsed)
|
|
275
|
-
parts = parsed[:canonical].split(" ")
|
|
276
|
-
name_ary = parts[0..1]
|
|
277
|
-
parsed[:details][0][:infraspecies].each do |data|
|
|
278
|
-
infrasp = data[:string]
|
|
279
|
-
rank = data[:rank]
|
|
280
|
-
name_ary << (rank && rank != "n/a" ? "#{rank} #{infrasp}" : infrasp)
|
|
281
|
-
end
|
|
282
|
-
parsed[:canonical] = name_ary.join(" ")
|
|
283
|
-
end
|
|
284
288
|
end
|
data/lib/biodiversity/version.rb
CHANGED
|
@@ -7,23 +7,23 @@ describe ScientificNameParser do
|
|
|
7
7
|
set_parser(ScientificNameParser.new)
|
|
8
8
|
end
|
|
9
9
|
|
|
10
|
-
it
|
|
10
|
+
it "returns version number" do
|
|
11
11
|
expect(ScientificNameParser.version).to match /^\d+\.\d+\.\d+/
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
it
|
|
14
|
+
it "fixes cases" do
|
|
15
15
|
names = [
|
|
16
|
-
[
|
|
17
|
-
[
|
|
18
|
-
[
|
|
19
|
-
[
|
|
16
|
+
["QUERCUS ALBA", "Quercus alba"],
|
|
17
|
+
["QUERCUS (QUERCUS) ALBA", "Quercus (Quercus) alba"],
|
|
18
|
+
["QÜERCUS", "Qüercus"],
|
|
19
|
+
["PARDOSA MOéSTA", "Pardosa moésta"],
|
|
20
20
|
]
|
|
21
21
|
names.each do |name, capitalization|
|
|
22
22
|
expect(ScientificNameParser::fix_case(name)).to eq capitalization
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
it
|
|
26
|
+
it "generates standardized json" do
|
|
27
27
|
read_test_file do |y|
|
|
28
28
|
expect(JSON.load(json(y[:name]))).to eq JSON.
|
|
29
29
|
load(y[:jsn]) unless y[:comment]
|
|
@@ -31,9 +31,9 @@ describe ScientificNameParser do
|
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
# it
|
|
34
|
+
# it "generates new test_file" do
|
|
35
35
|
# new_test = open(File.expand_path(dir +
|
|
36
|
-
#
|
|
36
|
+
# "../../spec/parser/test_data_new.txt"),"w")
|
|
37
37
|
# read_test_file do |y|
|
|
38
38
|
# if y[:comment]
|
|
39
39
|
# new_test.write y[:comment]
|
|
@@ -45,39 +45,39 @@ describe ScientificNameParser do
|
|
|
45
45
|
# end
|
|
46
46
|
# end
|
|
47
47
|
|
|
48
|
-
it
|
|
49
|
-
sn =
|
|
48
|
+
it "generates reasonable output if parser failed" do
|
|
49
|
+
sn = "ddd sljlkj 3223452432"
|
|
50
50
|
expect(json(sn)).to eq "{\"scientificName\":" \
|
|
51
51
|
"{\"id\":\"3ebf93d9-b62a-5198-8715-4c8302f0a5d7\",\"parsed\":false," \
|
|
52
52
|
"\"parser_version\":\"test_version\"," \
|
|
53
53
|
"\"verbatim\":\"ddd sljlkj 3223452432\"}}"
|
|
54
54
|
end
|
|
55
55
|
|
|
56
|
-
it
|
|
57
|
-
expect(parse(
|
|
56
|
+
it "shows version when the flag :show_version set to true" do
|
|
57
|
+
expect(parse("Homo sapiens")[:scientificName][:parser_version]).
|
|
58
58
|
to_not be_nil
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
-
it
|
|
62
|
-
expect(parse(
|
|
61
|
+
it "shows version for not spelled names" do
|
|
62
|
+
expect(parse("not_a_name")[:scientificName][:parser_version]).to_not be_nil
|
|
63
63
|
end
|
|
64
64
|
|
|
65
|
-
it
|
|
66
|
-
expect(parse(
|
|
65
|
+
it "generates version for viruses" do
|
|
66
|
+
expect(parse("Nile virus")[:scientificName][:parser_version]).to_not be_nil
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
|
|
70
|
-
describe
|
|
70
|
+
describe "ScientificNameParser with ranked canonicals" do
|
|
71
71
|
before(:all) do
|
|
72
72
|
@parser = ScientificNameParser.new(canonical_with_rank: true)
|
|
73
73
|
end
|
|
74
74
|
|
|
75
|
-
it
|
|
75
|
+
it "does not influence output for uninomials and binomials" do
|
|
76
76
|
data = [
|
|
77
|
-
[
|
|
78
|
-
[
|
|
79
|
-
|
|
80
|
-
[
|
|
77
|
+
["Ekbainacanthus Yakowlew 1902","Ekbainacanthus"],
|
|
78
|
+
["Ekboarmia sagnesi herrerai Exposito 2007",
|
|
79
|
+
"Ekboarmia sagnesi herrerai"],
|
|
80
|
+
["Ekboarmia holli Oberthür", "Ekboarmia holli"]]
|
|
81
81
|
|
|
82
82
|
data.each do |d|
|
|
83
83
|
parsed = @parser.parse(d[0])[:scientificName][:canonical]
|
|
@@ -85,14 +85,14 @@ describe 'ScientificNameParser with ranked canonicals' do
|
|
|
85
85
|
end
|
|
86
86
|
end
|
|
87
87
|
|
|
88
|
-
it
|
|
88
|
+
it "preserves rank for ranked multinomials" do
|
|
89
89
|
data = [
|
|
90
|
-
[
|
|
91
|
-
|
|
92
|
-
[
|
|
93
|
-
|
|
94
|
-
[
|
|
95
|
-
|
|
90
|
+
["Cola cordifolia var. puberula A. Chev.",
|
|
91
|
+
"Cola cordifolia var. puberula"],
|
|
92
|
+
["Abies homolepis forma umbilicata (Mayr) Schelle",
|
|
93
|
+
"Abies homolepis forma umbilicata"],
|
|
94
|
+
["Quercus ilex ssp. ballota (Desf.) Samp",
|
|
95
|
+
"Quercus ilex ssp. ballota"],
|
|
96
96
|
["Physarum globuliferum forma. flavum Leontyev & Dudka",
|
|
97
97
|
"Physarum globuliferum forma. flavum"]
|
|
98
98
|
]
|
|
@@ -101,16 +101,57 @@ describe 'ScientificNameParser with ranked canonicals' do
|
|
|
101
101
|
expect(parsed).to eq d[1]
|
|
102
102
|
end
|
|
103
103
|
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
describe ".add_rank_to_canonical" do
|
|
107
|
+
subject(:parser) { ScientificNameParser.new }
|
|
104
108
|
|
|
109
|
+
it "adds rank to infraspecies with rank" do
|
|
110
|
+
data = [
|
|
111
|
+
["Cola cordifolia var. puberula A. Chev.",
|
|
112
|
+
"Cola cordifolia puberula",
|
|
113
|
+
"Cola cordifolia var. puberula"],
|
|
114
|
+
["Abies homolepis forma umbilicata (Mayr) Schelle",
|
|
115
|
+
"Abies homolepis umbilicata",
|
|
116
|
+
"Abies homolepis forma umbilicata"],
|
|
117
|
+
["Quercus ilex ssp. ballota (Desf.) Samp",
|
|
118
|
+
"Quercus ilex ballota",
|
|
119
|
+
"Quercus ilex ssp. ballota"],
|
|
120
|
+
["Physarum globuliferum forma. flavum Leontyev & Dudka",
|
|
121
|
+
"Physarum globuliferum flavum",
|
|
122
|
+
"Physarum globuliferum forma. flavum"]
|
|
123
|
+
]
|
|
124
|
+
data.each do |d|
|
|
125
|
+
parsed = parser.parse(d[0])
|
|
126
|
+
canonical1 = parsed[:scientificName][:canonical]
|
|
127
|
+
expect(canonical1).to eq d[1]
|
|
128
|
+
ScientificNameParser.add_rank_to_canonical(parsed)
|
|
129
|
+
canonical2 = parsed[:scientificName][:canonical]
|
|
130
|
+
expect(canonical2).to eq d[2]
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it "does not work for hybrids yet" do
|
|
135
|
+
data = [["Corda X cordiflora var. puberula",
|
|
136
|
+
"Corda cordiflora puberula"]]
|
|
137
|
+
data.each do |d|
|
|
138
|
+
parsed = parser.parse(d[0])
|
|
139
|
+
canonical1 = parsed[:scientificName][:canonical]
|
|
140
|
+
expect(canonical1).to eq d[1]
|
|
141
|
+
ScientificNameParser.add_rank_to_canonical(parsed)
|
|
142
|
+
canonical2 = parsed[:scientificName][:canonical]
|
|
143
|
+
expect(canonical2).to eq d[1]
|
|
144
|
+
end
|
|
145
|
+
end
|
|
105
146
|
end
|
|
106
147
|
|
|
107
148
|
describe ParallelParser do
|
|
108
|
-
it
|
|
149
|
+
it "finds number of cpus" do
|
|
109
150
|
pparser = ParallelParser.new
|
|
110
151
|
expect(pparser.cpu_num).to be > 0
|
|
111
152
|
end
|
|
112
153
|
|
|
113
|
-
it
|
|
154
|
+
it "parses several names in parallel" do
|
|
114
155
|
names = []
|
|
115
156
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
|
116
157
|
names.uniq!
|
|
@@ -120,7 +161,7 @@ describe ParallelParser do
|
|
|
120
161
|
expect(res.keys.size).to eq names.size
|
|
121
162
|
end
|
|
122
163
|
|
|
123
|
-
it
|
|
164
|
+
it "parses several names in parallel with given num of processes" do
|
|
124
165
|
names = []
|
|
125
166
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
|
126
167
|
names.uniq!
|
|
@@ -130,8 +171,8 @@ describe ParallelParser do
|
|
|
130
171
|
expect(res.keys.size).to eq names.size
|
|
131
172
|
end
|
|
132
173
|
|
|
133
|
-
it
|
|
134
|
-
a hash with name as a key and parsed data as value
|
|
174
|
+
it "has parsed name in native ruby format and in returned as \
|
|
175
|
+
a hash with name as a key and parsed data as value" do
|
|
135
176
|
names = []
|
|
136
177
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
|
137
178
|
names.uniq!
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: biodiversity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dmitry Mozzherin
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-08-
|
|
11
|
+
date: 2015-08-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: treetop
|