biodiversity 3.3.0 → 3.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/README.md +9 -0
- data/lib/biodiversity/parser.rb +18 -14
- data/lib/biodiversity/version.rb +1 -1
- data/spec/parser/scientific_name_spec.rb +76 -35
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd706ba900f6f136fae256b85f55223e955b4300
|
4
|
+
data.tar.gz: 00b5ee4e9b8b34d7fcb5084ae4b82e6db8635522
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f34fb2b4e2d7e97f748967f9cec04625416b33789287ac6de7d63de348b676d4504b4c8a631505d7074989aff2d0597b857b0843b3f45cb4c5c3f5d61e7336ca
|
7
|
+
data.tar.gz: 8886e2fccfb35456c326e461c7f48d42ef3c5f76d2044219b8e63af2c690851f91cc4c536b2727ffe0684fdf0c006425322d6d7664b7dc7fe4717e02ed17031d
|
data/CHANGELOG
CHANGED
data/README.md
CHANGED
@@ -128,6 +128,15 @@ parser.parse(" Plantago major ")[:scientificName][:normalized]
|
|
128
128
|
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. \
|
129
129
|
Braun & Crous 2003")[:scientificName][:canonical]
|
130
130
|
|
131
|
+
# to get canonical form with infraspecies ranks
|
132
|
+
parsed = parser.parse("Seddera latifolia Hochst. & Steud. var. latifolia")
|
133
|
+
ranked = ScientificNameParser.add_rank_to_canonical(parsed)
|
134
|
+
ranked[:scientificName][:canonical]
|
135
|
+
#or
|
136
|
+
parser = ScientificNameParser.new(canonical_with_rank: true)
|
137
|
+
ranked = parser.parse("Seddera latifolia Hochst. & Steud. var. latifolia")
|
138
|
+
ranked[:scientificName][:canonical]
|
139
|
+
|
131
140
|
# to get detailed information about elements of the name
|
132
141
|
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
|
133
142
|
Braun & Crous 2003")[:scientificName][:details]
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -115,6 +115,20 @@ class ScientificNameParser
|
|
115
115
|
}
|
116
116
|
end
|
117
117
|
|
118
|
+
def self.add_rank_to_canonical(parsed)
|
119
|
+
return parsed if parsed[:scientificName][:hybrid]
|
120
|
+
name = parsed[:scientificName]
|
121
|
+
parts = name[:canonical].split(" ")
|
122
|
+
name_ary = parts[0..1]
|
123
|
+
name[:details][0][:infraspecies].each do |data|
|
124
|
+
infrasp = data[:string]
|
125
|
+
rank = data[:rank]
|
126
|
+
name_ary << (rank && rank != "n/a" ? "#{rank} #{infrasp}" : infrasp)
|
127
|
+
end
|
128
|
+
parsed[:scientificName][:canonical] = name_ary.join(" ")
|
129
|
+
parsed
|
130
|
+
end
|
131
|
+
|
118
132
|
def self.version
|
119
133
|
Biodiversity::VERSION
|
120
134
|
end
|
@@ -230,13 +244,14 @@ class ScientificNameParser
|
|
230
244
|
else
|
231
245
|
res.merge!(self)
|
232
246
|
end
|
247
|
+
res[:surrogate] = true if ScientificNameParser.surrogate?(res)
|
248
|
+
res = {:scientificName => res}
|
233
249
|
if (canonical_with_rank &&
|
234
250
|
canonical.count(" ") > 1 &&
|
235
|
-
res[:details][0][:infraspecies])
|
251
|
+
res[:scientificName][:details][0][:infraspecies])
|
236
252
|
ScientificNameParser.add_rank_to_canonical(res)
|
237
253
|
end
|
238
|
-
res
|
239
|
-
res = {:scientificName => res}
|
254
|
+
res
|
240
255
|
end
|
241
256
|
|
242
257
|
def @parsed.pos_json
|
@@ -270,15 +285,4 @@ class ScientificNameParser
|
|
270
285
|
name.match(surrogate2))
|
271
286
|
is_surrogate
|
272
287
|
end
|
273
|
-
|
274
|
-
def self.add_rank_to_canonical(parsed)
|
275
|
-
parts = parsed[:canonical].split(" ")
|
276
|
-
name_ary = parts[0..1]
|
277
|
-
parsed[:details][0][:infraspecies].each do |data|
|
278
|
-
infrasp = data[:string]
|
279
|
-
rank = data[:rank]
|
280
|
-
name_ary << (rank && rank != "n/a" ? "#{rank} #{infrasp}" : infrasp)
|
281
|
-
end
|
282
|
-
parsed[:canonical] = name_ary.join(" ")
|
283
|
-
end
|
284
288
|
end
|
data/lib/biodiversity/version.rb
CHANGED
@@ -7,23 +7,23 @@ describe ScientificNameParser do
|
|
7
7
|
set_parser(ScientificNameParser.new)
|
8
8
|
end
|
9
9
|
|
10
|
-
it
|
10
|
+
it "returns version number" do
|
11
11
|
expect(ScientificNameParser.version).to match /^\d+\.\d+\.\d+/
|
12
12
|
end
|
13
13
|
|
14
|
-
it
|
14
|
+
it "fixes cases" do
|
15
15
|
names = [
|
16
|
-
[
|
17
|
-
[
|
18
|
-
[
|
19
|
-
[
|
16
|
+
["QUERCUS ALBA", "Quercus alba"],
|
17
|
+
["QUERCUS (QUERCUS) ALBA", "Quercus (Quercus) alba"],
|
18
|
+
["QÜERCUS", "Qüercus"],
|
19
|
+
["PARDOSA MOéSTA", "Pardosa moésta"],
|
20
20
|
]
|
21
21
|
names.each do |name, capitalization|
|
22
22
|
expect(ScientificNameParser::fix_case(name)).to eq capitalization
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
it
|
26
|
+
it "generates standardized json" do
|
27
27
|
read_test_file do |y|
|
28
28
|
expect(JSON.load(json(y[:name]))).to eq JSON.
|
29
29
|
load(y[:jsn]) unless y[:comment]
|
@@ -31,9 +31,9 @@ describe ScientificNameParser do
|
|
31
31
|
end
|
32
32
|
|
33
33
|
|
34
|
-
# it
|
34
|
+
# it "generates new test_file" do
|
35
35
|
# new_test = open(File.expand_path(dir +
|
36
|
-
#
|
36
|
+
# "../../spec/parser/test_data_new.txt"),"w")
|
37
37
|
# read_test_file do |y|
|
38
38
|
# if y[:comment]
|
39
39
|
# new_test.write y[:comment]
|
@@ -45,39 +45,39 @@ describe ScientificNameParser do
|
|
45
45
|
# end
|
46
46
|
# end
|
47
47
|
|
48
|
-
it
|
49
|
-
sn =
|
48
|
+
it "generates reasonable output if parser failed" do
|
49
|
+
sn = "ddd sljlkj 3223452432"
|
50
50
|
expect(json(sn)).to eq "{\"scientificName\":" \
|
51
51
|
"{\"id\":\"3ebf93d9-b62a-5198-8715-4c8302f0a5d7\",\"parsed\":false," \
|
52
52
|
"\"parser_version\":\"test_version\"," \
|
53
53
|
"\"verbatim\":\"ddd sljlkj 3223452432\"}}"
|
54
54
|
end
|
55
55
|
|
56
|
-
it
|
57
|
-
expect(parse(
|
56
|
+
it "shows version when the flag :show_version set to true" do
|
57
|
+
expect(parse("Homo sapiens")[:scientificName][:parser_version]).
|
58
58
|
to_not be_nil
|
59
59
|
end
|
60
60
|
|
61
|
-
it
|
62
|
-
expect(parse(
|
61
|
+
it "shows version for not spelled names" do
|
62
|
+
expect(parse("not_a_name")[:scientificName][:parser_version]).to_not be_nil
|
63
63
|
end
|
64
64
|
|
65
|
-
it
|
66
|
-
expect(parse(
|
65
|
+
it "generates version for viruses" do
|
66
|
+
expect(parse("Nile virus")[:scientificName][:parser_version]).to_not be_nil
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
|
-
describe
|
70
|
+
describe "ScientificNameParser with ranked canonicals" do
|
71
71
|
before(:all) do
|
72
72
|
@parser = ScientificNameParser.new(canonical_with_rank: true)
|
73
73
|
end
|
74
74
|
|
75
|
-
it
|
75
|
+
it "does not influence output for uninomials and binomials" do
|
76
76
|
data = [
|
77
|
-
[
|
78
|
-
[
|
79
|
-
|
80
|
-
[
|
77
|
+
["Ekbainacanthus Yakowlew 1902","Ekbainacanthus"],
|
78
|
+
["Ekboarmia sagnesi herrerai Exposito 2007",
|
79
|
+
"Ekboarmia sagnesi herrerai"],
|
80
|
+
["Ekboarmia holli Oberthür", "Ekboarmia holli"]]
|
81
81
|
|
82
82
|
data.each do |d|
|
83
83
|
parsed = @parser.parse(d[0])[:scientificName][:canonical]
|
@@ -85,14 +85,14 @@ describe 'ScientificNameParser with ranked canonicals' do
|
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
88
|
-
it
|
88
|
+
it "preserves rank for ranked multinomials" do
|
89
89
|
data = [
|
90
|
-
[
|
91
|
-
|
92
|
-
[
|
93
|
-
|
94
|
-
[
|
95
|
-
|
90
|
+
["Cola cordifolia var. puberula A. Chev.",
|
91
|
+
"Cola cordifolia var. puberula"],
|
92
|
+
["Abies homolepis forma umbilicata (Mayr) Schelle",
|
93
|
+
"Abies homolepis forma umbilicata"],
|
94
|
+
["Quercus ilex ssp. ballota (Desf.) Samp",
|
95
|
+
"Quercus ilex ssp. ballota"],
|
96
96
|
["Physarum globuliferum forma. flavum Leontyev & Dudka",
|
97
97
|
"Physarum globuliferum forma. flavum"]
|
98
98
|
]
|
@@ -101,16 +101,57 @@ describe 'ScientificNameParser with ranked canonicals' do
|
|
101
101
|
expect(parsed).to eq d[1]
|
102
102
|
end
|
103
103
|
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe ".add_rank_to_canonical" do
|
107
|
+
subject(:parser) { ScientificNameParser.new }
|
104
108
|
|
109
|
+
it "adds rank to infraspecies with rank" do
|
110
|
+
data = [
|
111
|
+
["Cola cordifolia var. puberula A. Chev.",
|
112
|
+
"Cola cordifolia puberula",
|
113
|
+
"Cola cordifolia var. puberula"],
|
114
|
+
["Abies homolepis forma umbilicata (Mayr) Schelle",
|
115
|
+
"Abies homolepis umbilicata",
|
116
|
+
"Abies homolepis forma umbilicata"],
|
117
|
+
["Quercus ilex ssp. ballota (Desf.) Samp",
|
118
|
+
"Quercus ilex ballota",
|
119
|
+
"Quercus ilex ssp. ballota"],
|
120
|
+
["Physarum globuliferum forma. flavum Leontyev & Dudka",
|
121
|
+
"Physarum globuliferum flavum",
|
122
|
+
"Physarum globuliferum forma. flavum"]
|
123
|
+
]
|
124
|
+
data.each do |d|
|
125
|
+
parsed = parser.parse(d[0])
|
126
|
+
canonical1 = parsed[:scientificName][:canonical]
|
127
|
+
expect(canonical1).to eq d[1]
|
128
|
+
ScientificNameParser.add_rank_to_canonical(parsed)
|
129
|
+
canonical2 = parsed[:scientificName][:canonical]
|
130
|
+
expect(canonical2).to eq d[2]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
it "does not work for hybrids yet" do
|
135
|
+
data = [["Corda X cordiflora var. puberula",
|
136
|
+
"Corda cordiflora puberula"]]
|
137
|
+
data.each do |d|
|
138
|
+
parsed = parser.parse(d[0])
|
139
|
+
canonical1 = parsed[:scientificName][:canonical]
|
140
|
+
expect(canonical1).to eq d[1]
|
141
|
+
ScientificNameParser.add_rank_to_canonical(parsed)
|
142
|
+
canonical2 = parsed[:scientificName][:canonical]
|
143
|
+
expect(canonical2).to eq d[1]
|
144
|
+
end
|
145
|
+
end
|
105
146
|
end
|
106
147
|
|
107
148
|
describe ParallelParser do
|
108
|
-
it
|
149
|
+
it "finds number of cpus" do
|
109
150
|
pparser = ParallelParser.new
|
110
151
|
expect(pparser.cpu_num).to be > 0
|
111
152
|
end
|
112
153
|
|
113
|
-
it
|
154
|
+
it "parses several names in parallel" do
|
114
155
|
names = []
|
115
156
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
116
157
|
names.uniq!
|
@@ -120,7 +161,7 @@ describe ParallelParser do
|
|
120
161
|
expect(res.keys.size).to eq names.size
|
121
162
|
end
|
122
163
|
|
123
|
-
it
|
164
|
+
it "parses several names in parallel with given num of processes" do
|
124
165
|
names = []
|
125
166
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
126
167
|
names.uniq!
|
@@ -130,8 +171,8 @@ describe ParallelParser do
|
|
130
171
|
expect(res.keys.size).to eq names.size
|
131
172
|
end
|
132
173
|
|
133
|
-
it
|
134
|
-
a hash with name as a key and parsed data as value
|
174
|
+
it "has parsed name in native ruby format and in returned as \
|
175
|
+
a hash with name as a key and parsed data as value" do
|
135
176
|
names = []
|
136
177
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
137
178
|
names.uniq!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-08-
|
11
|
+
date: 2015-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|