biodiversity 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: df9dbe0a3ecca9197bcd2bfe2a029308be56a490
4
- data.tar.gz: d5ed6ae35b3c1e6f3a6ee7656bdbf9a1c1c78bd9
3
+ metadata.gz: bd706ba900f6f136fae256b85f55223e955b4300
4
+ data.tar.gz: 00b5ee4e9b8b34d7fcb5084ae4b82e6db8635522
5
5
  SHA512:
6
- metadata.gz: 7ebe5b7d92c94636c71b594396215c81e70dd12bd40e83bb4871b8997f4620937fd573b3dedb4b6c2b5e5c03775982e55bd8006387ff298312f0de93e19a8ee3
7
- data.tar.gz: 3308bb5e6e49b4dd215f9d92a7d1a91ff14764c5ef04ff401f29b11295352b12f82aaefb2cf2c2a10feedf03ab5e38dd54c9a6dead379e59ee206bae94487f78
6
+ metadata.gz: f34fb2b4e2d7e97f748967f9cec04625416b33789287ac6de7d63de348b676d4504b4c8a631505d7074989aff2d0597b857b0843b3f45cb4c5c3f5d61e7336ca
7
+ data.tar.gz: 8886e2fccfb35456c326e461c7f48d42ef3c5f76d2044219b8e63af2c690851f91cc4c536b2727ffe0684fdf0c006425322d6d7664b7dc7fe4717e02ed17031d
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ 3.4.0 -- added ScientificNameParser.add_rank_to_canonical(parsed) method.
2
+ This method allows to add infraspecific rank to a canonical form
3
+ after the fact of parsing.
1
4
 
2
5
  3.3.0 -- parserver gets new option -- -H for host, default 127.0.0.1
3
6
 
data/README.md CHANGED
@@ -128,6 +128,15 @@ parser.parse(" Plantago major ")[:scientificName][:normalized]
128
128
  parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. \
129
129
  Braun & Crous 2003")[:scientificName][:canonical]
130
130
 
131
+ # to get canonical form with infraspecies ranks
132
+ parsed = parser.parse("Seddera latifolia Hochst. & Steud. var. latifolia")
133
+ ranked = ScientificNameParser.add_rank_to_canonical(parsed)
134
+ ranked[:scientificName][:canonical]
135
+ #or
136
+ parser = ScientificNameParser.new(canonical_with_rank: true)
137
+ ranked = parser.parse("Seddera latifolia Hochst. & Steud. var. latifolia")
138
+ ranked[:scientificName][:canonical]
139
+
131
140
  # to get detailed information about elements of the name
132
141
  parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
133
142
  Braun & Crous 2003")[:scientificName][:details]
@@ -115,6 +115,20 @@ class ScientificNameParser
115
115
  }
116
116
  end
117
117
 
118
+ def self.add_rank_to_canonical(parsed)
119
+ return parsed if parsed[:scientificName][:hybrid]
120
+ name = parsed[:scientificName]
121
+ parts = name[:canonical].split(" ")
122
+ name_ary = parts[0..1]
123
+ name[:details][0][:infraspecies].each do |data|
124
+ infrasp = data[:string]
125
+ rank = data[:rank]
126
+ name_ary << (rank && rank != "n/a" ? "#{rank} #{infrasp}" : infrasp)
127
+ end
128
+ parsed[:scientificName][:canonical] = name_ary.join(" ")
129
+ parsed
130
+ end
131
+
118
132
  def self.version
119
133
  Biodiversity::VERSION
120
134
  end
@@ -230,13 +244,14 @@ class ScientificNameParser
230
244
  else
231
245
  res.merge!(self)
232
246
  end
247
+ res[:surrogate] = true if ScientificNameParser.surrogate?(res)
248
+ res = {:scientificName => res}
233
249
  if (canonical_with_rank &&
234
250
  canonical.count(" ") > 1 &&
235
- res[:details][0][:infraspecies])
251
+ res[:scientificName][:details][0][:infraspecies])
236
252
  ScientificNameParser.add_rank_to_canonical(res)
237
253
  end
238
- res[:surrogate] = true if ScientificNameParser.surrogate?(res)
239
- res = {:scientificName => res}
254
+ res
240
255
  end
241
256
 
242
257
  def @parsed.pos_json
@@ -270,15 +285,4 @@ class ScientificNameParser
270
285
  name.match(surrogate2))
271
286
  is_surrogate
272
287
  end
273
-
274
- def self.add_rank_to_canonical(parsed)
275
- parts = parsed[:canonical].split(" ")
276
- name_ary = parts[0..1]
277
- parsed[:details][0][:infraspecies].each do |data|
278
- infrasp = data[:string]
279
- rank = data[:rank]
280
- name_ary << (rank && rank != "n/a" ? "#{rank} #{infrasp}" : infrasp)
281
- end
282
- parsed[:canonical] = name_ary.join(" ")
283
- end
284
288
  end
@@ -1,3 +1,3 @@
1
1
  module Biodiversity
2
- VERSION = "3.3.0"
2
+ VERSION = "3.4.0"
3
3
  end
@@ -7,23 +7,23 @@ describe ScientificNameParser do
7
7
  set_parser(ScientificNameParser.new)
8
8
  end
9
9
 
10
- it 'returns version number' do
10
+ it "returns version number" do
11
11
  expect(ScientificNameParser.version).to match /^\d+\.\d+\.\d+/
12
12
  end
13
13
 
14
- it 'fixes cases' do
14
+ it "fixes cases" do
15
15
  names = [
16
- ['QUERCUS ALBA', 'Quercus alba'],
17
- ['QUERCUS (QUERCUS) ALBA', 'Quercus (Quercus) alba'],
18
- ['QÜERCUS', 'Qüercus'],
19
- ['PARDOSA MOéSTA', 'Pardosa moésta'],
16
+ ["QUERCUS ALBA", "Quercus alba"],
17
+ ["QUERCUS (QUERCUS) ALBA", "Quercus (Quercus) alba"],
18
+ ["QÜERCUS", "Qüercus"],
19
+ ["PARDOSA MOéSTA", "Pardosa moésta"],
20
20
  ]
21
21
  names.each do |name, capitalization|
22
22
  expect(ScientificNameParser::fix_case(name)).to eq capitalization
23
23
  end
24
24
  end
25
25
 
26
- it 'generates standardized json' do
26
+ it "generates standardized json" do
27
27
  read_test_file do |y|
28
28
  expect(JSON.load(json(y[:name]))).to eq JSON.
29
29
  load(y[:jsn]) unless y[:comment]
@@ -31,9 +31,9 @@ describe ScientificNameParser do
31
31
  end
32
32
 
33
33
 
34
- # it 'generates new test_file' do
34
+ # it "generates new test_file" do
35
35
  # new_test = open(File.expand_path(dir +
36
- # '../../spec/parser/test_data_new.txt'),'w')
36
+ # "../../spec/parser/test_data_new.txt"),"w")
37
37
  # read_test_file do |y|
38
38
  # if y[:comment]
39
39
  # new_test.write y[:comment]
@@ -45,39 +45,39 @@ describe ScientificNameParser do
45
45
  # end
46
46
  # end
47
47
 
48
- it 'generates reasonable output if parser failed' do
49
- sn = 'ddd sljlkj 3223452432'
48
+ it "generates reasonable output if parser failed" do
49
+ sn = "ddd sljlkj 3223452432"
50
50
  expect(json(sn)).to eq "{\"scientificName\":" \
51
51
  "{\"id\":\"3ebf93d9-b62a-5198-8715-4c8302f0a5d7\",\"parsed\":false," \
52
52
  "\"parser_version\":\"test_version\"," \
53
53
  "\"verbatim\":\"ddd sljlkj 3223452432\"}}"
54
54
  end
55
55
 
56
- it 'shows version when the flag :show_version set to true' do
57
- expect(parse('Homo sapiens')[:scientificName][:parser_version]).
56
+ it "shows version when the flag :show_version set to true" do
57
+ expect(parse("Homo sapiens")[:scientificName][:parser_version]).
58
58
  to_not be_nil
59
59
  end
60
60
 
61
- it 'shows version for not spelled names' do
62
- expect(parse('not_a_name')[:scientificName][:parser_version]).to_not be_nil
61
+ it "shows version for not spelled names" do
62
+ expect(parse("not_a_name")[:scientificName][:parser_version]).to_not be_nil
63
63
  end
64
64
 
65
- it 'generates version for viruses' do
66
- expect(parse('Nile virus')[:scientificName][:parser_version]).to_not be_nil
65
+ it "generates version for viruses" do
66
+ expect(parse("Nile virus")[:scientificName][:parser_version]).to_not be_nil
67
67
  end
68
68
  end
69
69
 
70
- describe 'ScientificNameParser with ranked canonicals' do
70
+ describe "ScientificNameParser with ranked canonicals" do
71
71
  before(:all) do
72
72
  @parser = ScientificNameParser.new(canonical_with_rank: true)
73
73
  end
74
74
 
75
- it 'does not influence output for uninomials and binomials' do
75
+ it "does not influence output for uninomials and binomials" do
76
76
  data = [
77
- ['Ekbainacanthus Yakowlew 1902','Ekbainacanthus'],
78
- ['Ekboarmia sagnesi herrerai Exposito 2007',
79
- 'Ekboarmia sagnesi herrerai'],
80
- ['Ekboarmia holli Oberthür', 'Ekboarmia holli']]
77
+ ["Ekbainacanthus Yakowlew 1902","Ekbainacanthus"],
78
+ ["Ekboarmia sagnesi herrerai Exposito 2007",
79
+ "Ekboarmia sagnesi herrerai"],
80
+ ["Ekboarmia holli Oberthür", "Ekboarmia holli"]]
81
81
 
82
82
  data.each do |d|
83
83
  parsed = @parser.parse(d[0])[:scientificName][:canonical]
@@ -85,14 +85,14 @@ describe 'ScientificNameParser with ranked canonicals' do
85
85
  end
86
86
  end
87
87
 
88
- it 'preserves rank for ranked multinomials' do
88
+ it "preserves rank for ranked multinomials" do
89
89
  data = [
90
- ['Cola cordifolia var. puberula A. Chev.',
91
- 'Cola cordifolia var. puberula'],
92
- ['Abies homolepis forma umbilicata (Mayr) Schelle',
93
- 'Abies homolepis forma umbilicata'],
94
- ['Quercus ilex ssp. ballota (Desf.) Samp',
95
- 'Quercus ilex ssp. ballota'],
90
+ ["Cola cordifolia var. puberula A. Chev.",
91
+ "Cola cordifolia var. puberula"],
92
+ ["Abies homolepis forma umbilicata (Mayr) Schelle",
93
+ "Abies homolepis forma umbilicata"],
94
+ ["Quercus ilex ssp. ballota (Desf.) Samp",
95
+ "Quercus ilex ssp. ballota"],
96
96
  ["Physarum globuliferum forma. flavum Leontyev & Dudka",
97
97
  "Physarum globuliferum forma. flavum"]
98
98
  ]
@@ -101,16 +101,57 @@ describe 'ScientificNameParser with ranked canonicals' do
101
101
  expect(parsed).to eq d[1]
102
102
  end
103
103
  end
104
+ end
105
+
106
+ describe ".add_rank_to_canonical" do
107
+ subject(:parser) { ScientificNameParser.new }
104
108
 
109
+ it "adds rank to infraspecies with rank" do
110
+ data = [
111
+ ["Cola cordifolia var. puberula A. Chev.",
112
+ "Cola cordifolia puberula",
113
+ "Cola cordifolia var. puberula"],
114
+ ["Abies homolepis forma umbilicata (Mayr) Schelle",
115
+ "Abies homolepis umbilicata",
116
+ "Abies homolepis forma umbilicata"],
117
+ ["Quercus ilex ssp. ballota (Desf.) Samp",
118
+ "Quercus ilex ballota",
119
+ "Quercus ilex ssp. ballota"],
120
+ ["Physarum globuliferum forma. flavum Leontyev & Dudka",
121
+ "Physarum globuliferum flavum",
122
+ "Physarum globuliferum forma. flavum"]
123
+ ]
124
+ data.each do |d|
125
+ parsed = parser.parse(d[0])
126
+ canonical1 = parsed[:scientificName][:canonical]
127
+ expect(canonical1).to eq d[1]
128
+ ScientificNameParser.add_rank_to_canonical(parsed)
129
+ canonical2 = parsed[:scientificName][:canonical]
130
+ expect(canonical2).to eq d[2]
131
+ end
132
+ end
133
+
134
+ it "does not work for hybrids yet" do
135
+ data = [["Corda X cordiflora var. puberula",
136
+ "Corda cordiflora puberula"]]
137
+ data.each do |d|
138
+ parsed = parser.parse(d[0])
139
+ canonical1 = parsed[:scientificName][:canonical]
140
+ expect(canonical1).to eq d[1]
141
+ ScientificNameParser.add_rank_to_canonical(parsed)
142
+ canonical2 = parsed[:scientificName][:canonical]
143
+ expect(canonical2).to eq d[1]
144
+ end
145
+ end
105
146
  end
106
147
 
107
148
  describe ParallelParser do
108
- it 'finds number of cpus' do
149
+ it "finds number of cpus" do
109
150
  pparser = ParallelParser.new
110
151
  expect(pparser.cpu_num).to be > 0
111
152
  end
112
153
 
113
- it 'parses several names in parallel' do
154
+ it "parses several names in parallel" do
114
155
  names = []
115
156
  read_test_file { |n| names << (n[:name]) if n[:name] }
116
157
  names.uniq!
@@ -120,7 +161,7 @@ describe ParallelParser do
120
161
  expect(res.keys.size).to eq names.size
121
162
  end
122
163
 
123
- it 'parses several names in parallel with given num of processes' do
164
+ it "parses several names in parallel with given num of processes" do
124
165
  names = []
125
166
  read_test_file { |n| names << (n[:name]) if n[:name] }
126
167
  names.uniq!
@@ -130,8 +171,8 @@ describe ParallelParser do
130
171
  expect(res.keys.size).to eq names.size
131
172
  end
132
173
 
133
- it 'has parsed name in native ruby format and in returned as \
134
- a hash with name as a key and parsed data as value' do
174
+ it "has parsed name in native ruby format and in returned as \
175
+ a hash with name as a key and parsed data as value" do
135
176
  names = []
136
177
  read_test_file { |n| names << (n[:name]) if n[:name] }
137
178
  names.uniq!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-15 00:00:00.000000000 Z
11
+ date: 2015-08-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop