biodiversity 0.6.1 → 0.6.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/nnparse +10 -4
- data/spec/parser/scientific_name_clean.spec.rb +2 -0
- data/spec/parser/test_data.txt +1 -0
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.6.
|
1
|
+
0.6.3
|
data/bin/nnparse
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
|
2
|
+
# encoding: utf-8
|
3
|
+
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
|
4
|
+
if ruby_min_version < 19
|
5
|
+
require 'rubygems'
|
6
|
+
gem_name = 'biodiversity'
|
7
|
+
else
|
8
|
+
gem_name = 'biodiversity19'
|
9
|
+
end
|
4
10
|
gem gem_name rescue nil
|
5
11
|
|
6
12
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
@@ -19,13 +25,13 @@ end
|
|
19
25
|
input = ARGV[0]
|
20
26
|
output = ARGV[1] || 'parsed.json'
|
21
27
|
|
22
|
-
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
|
23
28
|
|
24
29
|
p = ScientificNameParser.new
|
25
30
|
o = open(output, 'w')
|
26
31
|
count = 0
|
27
32
|
puts 'Parsing...'
|
28
|
-
|
33
|
+
f = ruby_min_version < 19 ? open(input) : open(input, 'r:utf-8')
|
34
|
+
f.each do |line|
|
29
35
|
count += 1
|
30
36
|
puts("%s lines parsed" % count) if count % 10000 == 0
|
31
37
|
name = line.gsub(/^[\d]*\s*/, '').strip
|
@@ -516,6 +516,8 @@ describe ScientificNameClean do
|
|
516
516
|
details(sn).should == [{:genus=>{:string=>"Flexibacter"}, :species=>{:string=>"elegans", :authorship=>"Lewin 1969 non Soriano 1945", :basionymAuthorTeam=>{:authorTeam=>"Lewin", :author=>["Lewin"], :year=>"1969"}}}]
|
517
517
|
sn = 'Flexibacter elegans Soriano 1945, non Lewin 1969'
|
518
518
|
details(sn).should == [{:genus=>{:string=>"Flexibacter"}, :species=>{:string=>"elegans", :authorship=>"Soriano 1945, non Lewin 1969", :basionymAuthorTeam=>{:authorTeam=>"Soriano", :author=>["Soriano"], :year=>"1945"}}}]
|
519
|
+
sn = 'Schottera nicaeënsis (J.V. Lamouroux ex Duby) Guiry & Hollenberg'
|
520
|
+
details(sn).should == [{:genus=>{:string=>"Schottera"}, :species=>{:string=>"nicaeënsis", :authorship=>"(J.V. Lamouroux ex Duby) Guiry & Hollenberg", :combinationAuthorTeam=>{:authorTeam=>"Guiry & Hollenberg", :author=>["Guiry", "Hollenberg"]}, :basionymAuthorTeam=>{:authorTeam=>"J.V. Lamouroux", :author=>["J.V. Lamouroux"], :exAuthorTeam=>{:authorTeam=>"Duby", :author=>["Duby"]}}}}]
|
519
521
|
end
|
520
522
|
|
521
523
|
# Combination genus names should be merged without dash or capital letter
|
data/spec/parser/test_data.txt
CHANGED
@@ -53,6 +53,7 @@ Platypus bicaudatulus Schedl, 1935h|{"scientificName":{"parsed":true,"parser_run
|
|
53
53
|
Pseudocercospora dendrobii U. Braun & Crous|{"scientificName":{"parsed":true,"parser_run":1,"verbatim":"Pseudocercospora dendrobii U. Braun & Crous","normalized":"Pseudocercospora dendrobii U. Braun et Crous","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"U. Braun & Crous","basionymAuthorTeam":{"authorTeam":"U. Braun & Crous","author":["U. Braun","Crous"]}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",29],"30":["author_word",35],"38":["author_word",43]}}}
|
54
54
|
Pseudocercospora dendrobii U. Braun et Crous|{"scientificName":{"parsed":true,"parser_run":1,"verbatim":"Pseudocercospora dendrobii U. Braun et Crous","normalized":"Pseudocercospora dendrobii U. Braun et Crous","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"U. Braun et Crous","basionymAuthorTeam":{"authorTeam":"U. Braun et Crous","author":["U. Braun","Crous"]}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",29],"30":["author_word",35],"39":["author_word",44]}}}
|
55
55
|
Pseudocercospora dendrobii U. Braun and Crous|{"scientificName":{"parsed":true,"parser_run":1,"verbatim":"Pseudocercospora dendrobii U. Braun and Crous","normalized":"Pseudocercospora dendrobii U. Braun et Crous","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"U. Braun and Crous","basionymAuthorTeam":{"authorTeam":"U. Braun and Crous","author":["U. Braun","Crous"]}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",29],"30":["author_word",35],"40":["author_word",45]}}}
|
56
|
+
Schottera nicaeënsis (J.V. Lamouroux ex Duby) Guiry & Hollenberg|{"scientificName":{"parsed":true, "verbatim":"Schottera nicaeënsis (J.V. Lamouroux ex Duby) Guiry & Hollenberg", "normalized":"Schottera nicaeënsis (J.V. Lamouroux ex Duby) Guiry et Hollenberg", "canonical":"Schottera nicaeënsis", "hybrid":false, "details":[{"genus":{"string":"Schottera"}, "species":{"string":"nicaeënsis", "authorship":"(J.V. Lamouroux ex Duby) Guiry & Hollenberg", "combinationAuthorTeam":{"authorTeam":"Guiry & Hollenberg", "author":["Guiry", "Hollenberg"]}, "basionymAuthorTeam":{"authorTeam":"J.V. Lamouroux", "author":["J.V. Lamouroux"], "exAuthorTeam":{"authorTeam":"Duby", "author":["Duby"]}}}}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 20], "22":["author_word", 26], "27":["author_word", 36], "40":["author_word", 44], "46":["author_word", 51], "54":["author_word", 64]}}}
|
56
57
|
|
57
58
|
#binomial with several authors and a year
|
58
59
|
Pseudocercospora dendrobii U. Braun & Crous 2003|{"scientificName":{"parsed":true,"parser_run":1,"verbatim":"Pseudocercospora dendrobii U. Braun & Crous 2003","normalized":"Pseudocercospora dendrobii U. Braun et Crous 2003","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"U. Braun & Crous 2003","basionymAuthorTeam":{"authorTeam":"U. Braun & Crous","author":["U. Braun","Crous"],"year":"2003"}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",29],"30":["author_word",35],"38":["author_word",43],"44":["year",48]}}}
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 1
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 6
|
9
|
-
-
|
10
|
-
version: 0.6.
|
9
|
+
- 3
|
10
|
+
version: 0.6.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-26 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|