biodiversity 3.1.2 → 3.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.ruby-version +1 -0
- data/.travis.yml +4 -3
- data/Gemfile +1 -13
- data/Rakefile +16 -34
- data/bin/nnparse +8 -19
- data/bin/parserver +1 -2
- data/biodiversity.gemspec +28 -0
- data/lib/biodiversity.rb +12 -6
- data/lib/biodiversity/guid.rb +1 -2
- data/lib/biodiversity/guid/lsid.rb +2 -4
- data/lib/biodiversity/parser.rb +5 -12
- data/lib/biodiversity/version.rb +3 -0
- data/spec/biodiversity_spec.rb +9 -0
- data/spec/guid/lsid.spec.rb +5 -7
- data/spec/parser/scientific_name_canonical_spec.rb +37 -0
- data/spec/parser/{scientific_name_clean.spec.rb → scientific_name_clean_spec.rb} +38 -10
- data/spec/parser/{scientific_name_dirty.spec.rb → scientific_name_dirty_spec.rb} +8 -3
- data/spec/parser/{scientific_name.spec.rb → scientific_name_spec.rb} +16 -17
- data/spec/spec_helper.rb +82 -0
- metadata +45 -84
- data/.rvmrc +0 -1
- data/Gemfile.lock +0 -47
- data/VERSION +0 -1
- data/conf/environment.rb +0 -3
- data/pkg/.gitignore +0 -0
- data/spec/parser/scientific_name_canonical.spec.rb +0 -26
- data/spec/parser/spec_helper.rb +0 -70
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: c6dd51dda2a710e62e09554051bf9f26569e8f2c
|
|
4
|
+
data.tar.gz: 24d44fde0d6d582f485a45cfb0c809c22793aac3
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 5e5505a4a8dd3980eac1f126886bb6d59840c2380149d0690666311445a3875f0693dd23b431d96e96736859e40fe4758e8e542e7cf11131cb83f4e3c36400b5
|
|
7
|
+
data.tar.gz: f33859933760acd98e440df9b059473f651eaa918d5cb799f855d506793d1877456d9bbf4eb42dc949676a2342b79086190b6faf8831ad05f2e39abbee0ff23a
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.0.0-p353
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
|
@@ -1,15 +1,3 @@
|
|
|
1
1
|
source 'https://rubygems.org'
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
gem 'treetop', '~> 1.4'
|
|
5
|
-
gem 'parallel', '~> 0.6'
|
|
6
|
-
gem 'unicode_utils', '~> 1.4'
|
|
7
|
-
|
|
8
|
-
group :development do
|
|
9
|
-
gem 'debugger', '~> 1.5'
|
|
10
|
-
gem 'jeweler', '~> 1.8'
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
group :test do
|
|
14
|
-
gem 'rspec', '~> 2.13'
|
|
15
|
-
end
|
|
3
|
+
gemspec
|
data/Rakefile
CHANGED
|
@@ -1,12 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
require 'bundler'
|
|
2
|
+
Bundler::GemHelper.install_tasks
|
|
3
|
+
|
|
4
|
+
begin
|
|
5
|
+
Bundler.setup(:default, :development)
|
|
6
|
+
rescue Bundler::BundlerError => e
|
|
7
|
+
$stderr.puts e.message
|
|
8
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
|
9
|
+
exit e.status_code
|
|
10
|
+
end
|
|
11
|
+
|
|
5
12
|
require 'rspec/core'
|
|
6
13
|
require 'rspec/core/rake_task'
|
|
14
|
+
require 'rake/dsl_definition'
|
|
15
|
+
require 'rake'
|
|
16
|
+
require 'rspec'
|
|
17
|
+
require 'rspec/core/rake_task'
|
|
7
18
|
|
|
8
|
-
#Gem::manage_gems
|
|
9
|
-
#require 'rake/gempackagetask'
|
|
10
19
|
|
|
11
20
|
task :default => :spec
|
|
12
21
|
|
|
@@ -14,35 +23,8 @@ RSpec::Core::RakeTask.new do |t|
|
|
|
14
23
|
t.pattern = 'spec/**/*spec.rb'
|
|
15
24
|
end
|
|
16
25
|
|
|
17
|
-
ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
begin
|
|
21
|
-
require 'jeweler'
|
|
22
|
-
Jeweler::Tasks.new do |gem|
|
|
23
|
-
gem.name = 'biodiversity'
|
|
24
|
-
#To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
|
|
25
|
-
gem.summary = 'Parser of scientific names'
|
|
26
|
-
gem.description = 'Tools for biodiversity informatics'
|
|
27
|
-
gem.email = 'dmozzherin@gmail.com'
|
|
28
|
-
gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
|
|
29
|
-
gem.authors = ['Dmitry Mozzherin']
|
|
30
|
-
gem.has_rdoc = false
|
|
31
|
-
gem.bindir = 'bin'
|
|
32
|
-
gem.executables = ['nnparse', 'parserver']
|
|
33
|
-
gem.add_dependency('treetop')
|
|
34
|
-
gem.add_dependency('parallel')
|
|
35
|
-
# gem.add_dependency('json') if ruby_version < 19
|
|
36
|
-
gem.add_development_dependency "rspec"
|
|
37
|
-
# gem is a Gem::Specification...
|
|
38
|
-
# see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
39
|
-
end
|
|
40
|
-
rescue LoadError
|
|
41
|
-
puts 'Jeweler (or a dependency) not available. ' +
|
|
42
|
-
'Install it with: sudo gem install jeweler'
|
|
43
|
-
end
|
|
44
|
-
|
|
45
26
|
task :tt do
|
|
27
|
+
dir = File.dirname(__FILE__)
|
|
46
28
|
['scientific_name_clean',
|
|
47
29
|
'scientific_name_dirty',
|
|
48
30
|
'scientific_name_canonical'].each do |f|
|
data/bin/nnparse
CHANGED
|
@@ -1,24 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
# encoding: utf-8
|
|
3
|
-
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
|
|
4
|
-
if ruby_min_version < 19
|
|
5
|
-
require 'rubygems'
|
|
6
|
-
gem_name = 'biodiversity'
|
|
7
|
-
else
|
|
8
|
-
gem_name = 'biodiversity19'
|
|
9
|
-
end
|
|
10
|
-
gem gem_name rescue nil
|
|
11
3
|
|
|
12
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
|
13
4
|
require 'biodiversity'
|
|
14
5
|
require 'json'
|
|
15
6
|
|
|
16
7
|
def parser_error(name)
|
|
17
|
-
{'scientificName' =>
|
|
8
|
+
{ 'scientificName' =>
|
|
9
|
+
{ 'parsed' => false,
|
|
10
|
+
'verbatim' => name,
|
|
11
|
+
'error' => 'Parser error' } }.to_json
|
|
18
12
|
end
|
|
19
13
|
|
|
20
14
|
if ARGV.empty?
|
|
21
|
-
puts "Usage:\n\
|
|
15
|
+
puts "Usage:\n\n" +
|
|
16
|
+
"nnparse file_with_scientific_names [output_file]\n\n" +
|
|
17
|
+
"default output_file is parsed.json\n\n"
|
|
22
18
|
exit
|
|
23
19
|
end
|
|
24
20
|
|
|
@@ -30,21 +26,14 @@ p = ScientificNameParser.new
|
|
|
30
26
|
o = open(output, 'w')
|
|
31
27
|
count = 0
|
|
32
28
|
puts 'Parsing...'
|
|
33
|
-
f =
|
|
29
|
+
f = open(input, 'r:utf-8')
|
|
34
30
|
f.each do |line|
|
|
35
31
|
count += 1
|
|
36
32
|
puts("%s lines parsed" % count) if count % 10000 == 0
|
|
37
33
|
name = line.gsub(/^[\d]*\s*/, '').strip
|
|
38
34
|
begin
|
|
39
|
-
if ruby_min_version < 19
|
|
40
|
-
old_kcode = $KCODE
|
|
41
|
-
$KCODE = 'NONE'
|
|
42
|
-
end
|
|
43
35
|
p.parse(name)
|
|
44
36
|
parsed_data = p.parsed.all_json rescue parser_error(name)
|
|
45
|
-
if ruby_min_version < 19
|
|
46
|
-
$KCODE = old_kcode
|
|
47
|
-
end
|
|
48
37
|
rescue
|
|
49
38
|
parsed_data = parser_error(name)
|
|
50
39
|
end
|
data/bin/parserver
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'biodiversity/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |gem|
|
|
6
|
+
gem.name = 'biodiversity'
|
|
7
|
+
gem.version = Biodiversity::VERSION
|
|
8
|
+
gem.homepage = 'https://github.com/GlobalNamesArchitecture/biodiversity'
|
|
9
|
+
gem.license = 'MIT'
|
|
10
|
+
gem.summary = %Q{Parser of scientific names}
|
|
11
|
+
gem.description = %Q{Tools for biodiversity informatics}
|
|
12
|
+
gem.authors = ['Dmitry Mozzherin']
|
|
13
|
+
gem.email = 'dmozzherin@gmail.com'
|
|
14
|
+
|
|
15
|
+
gem.files = `git ls-files`.split("\n")
|
|
16
|
+
gem.executables = ['nnparse', 'parserver']
|
|
17
|
+
gem.require_paths = ['lib']
|
|
18
|
+
|
|
19
|
+
gem.add_runtime_dependency 'treetop', '~> 1.4'
|
|
20
|
+
gem.add_runtime_dependency 'parallel', '~> 0.7'
|
|
21
|
+
gem.add_runtime_dependency 'unicode_utils', '~> 1.4'
|
|
22
|
+
|
|
23
|
+
gem.add_development_dependency 'bundler', '~> 1.3'
|
|
24
|
+
gem.add_development_dependency 'rake', '~> 10.1'
|
|
25
|
+
gem.add_development_dependency 'rspec', '~> 2.14'
|
|
26
|
+
gem.add_development_dependency 'rr', '~> 1.1'
|
|
27
|
+
gem.add_development_dependency 'debugger', '~> 1.6'
|
|
28
|
+
end
|
data/lib/biodiversity.rb
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
require 'rubygems'
|
|
2
1
|
require 'treetop'
|
|
2
|
+
require 'json'
|
|
3
|
+
require 'open-uri'
|
|
4
|
+
require_relative 'biodiversity/version'
|
|
5
|
+
require_relative 'biodiversity/parser'
|
|
6
|
+
require_relative 'biodiversity/guid'
|
|
3
7
|
|
|
4
|
-
|
|
8
|
+
module Biodiversity
|
|
9
|
+
LSID_RESOLVER_URL = 'http://lsid.tdwg.org/'
|
|
10
|
+
|
|
11
|
+
def self.version
|
|
12
|
+
VERSION
|
|
13
|
+
end
|
|
14
|
+
end
|
|
5
15
|
|
|
6
|
-
BIODIVERSITY_ROOT = File.join(dir, 'biodiversity')
|
|
7
|
-
require File.join(dir, "/../conf/environment")
|
|
8
|
-
require File.join(BIODIVERSITY_ROOT, "parser")
|
|
9
|
-
require File.join(BIODIVERSITY_ROOT, "guid")
|
data/lib/biodiversity/guid.rb
CHANGED
|
@@ -1,2 +1 @@
|
|
|
1
|
-
|
|
2
|
-
require File.join(dir, *%w[guid lsid])
|
|
1
|
+
require_relative 'guid/lsid'
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
require 'open-uri'
|
|
2
|
-
|
|
3
1
|
class LsidResolver
|
|
4
2
|
def self.resolve(lsid)
|
|
5
3
|
http_get_rdf(lsid)
|
|
@@ -8,11 +6,11 @@ class LsidResolver
|
|
|
8
6
|
protected
|
|
9
7
|
def self.http_get_rdf(lsid)
|
|
10
8
|
rdf = ''
|
|
11
|
-
open(LSID_RESOLVER_URL + lsid) do |f|
|
|
9
|
+
open(Biodiversity::LSID_RESOLVER_URL + lsid) do |f|
|
|
12
10
|
f.each do |line|
|
|
13
11
|
rdf += line if !line.strip.blank?
|
|
14
12
|
end
|
|
15
13
|
end
|
|
16
14
|
rdf
|
|
17
15
|
end
|
|
18
|
-
end
|
|
16
|
+
end
|
data/lib/biodiversity/parser.rb
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
require File.join(dir, *%w[parser scientific_name_canonical])
|
|
6
|
-
require 'rubygems'
|
|
7
|
-
require 'json'
|
|
2
|
+
require_relative 'parser/scientific_name_clean'
|
|
3
|
+
require_relative 'parser/scientific_name_dirty'
|
|
4
|
+
require_relative 'parser/scientific_name_canonical'
|
|
8
5
|
|
|
9
6
|
module PreProcessor
|
|
10
7
|
NOTES = /\s+(species\s+group|species\s+complex|group|author)\b.*$/i
|
|
@@ -108,10 +105,6 @@ end
|
|
|
108
105
|
# end
|
|
109
106
|
|
|
110
107
|
class ScientificNameParser
|
|
111
|
-
VERSION = open(File.join(File.dirname(__FILE__),
|
|
112
|
-
'..',
|
|
113
|
-
'..',
|
|
114
|
-
'VERSION')).readline.strip
|
|
115
108
|
|
|
116
109
|
FAILED_RESULT = ->(name) do
|
|
117
110
|
{ scientificName:
|
|
@@ -120,7 +113,7 @@ class ScientificNameParser
|
|
|
120
113
|
end
|
|
121
114
|
|
|
122
115
|
def self.version
|
|
123
|
-
VERSION
|
|
116
|
+
Biodiversity::VERSION
|
|
124
117
|
end
|
|
125
118
|
|
|
126
119
|
def self.fix_case(name_string)
|
|
@@ -213,7 +206,7 @@ class ScientificNameParser
|
|
|
213
206
|
def @parsed.all(opts = {})
|
|
214
207
|
canonical_with_rank = !!opts[:canonical_with_rank]
|
|
215
208
|
parsed = self.class != Hash
|
|
216
|
-
res = { parsed: parsed, parser_version: ScientificNameParser::
|
|
209
|
+
res = { parsed: parsed, parser_version: ScientificNameParser::version}
|
|
217
210
|
if parsed
|
|
218
211
|
hybrid = self.hybrid rescue false
|
|
219
212
|
res.merge!({
|
data/spec/biodiversity_spec.rb
CHANGED
data/spec/guid/lsid.spec.rb
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
require 'rubygems'
|
|
3
|
-
require File.expand_path(dir + "../../conf/environment")
|
|
4
|
-
require File.expand_path(dir + "../../lib/biodiversity/guid")
|
|
1
|
+
require_relative '../spec_helper'
|
|
5
2
|
|
|
6
3
|
describe LsidResolver do
|
|
7
|
-
it
|
|
8
|
-
lsid =
|
|
9
|
-
LsidResolver.resolve(lsid)
|
|
4
|
+
it 'should return RFD document from lsid' do
|
|
5
|
+
lsid = 'urn:lsid:ubio.org:classificationbank:2232671'
|
|
6
|
+
stub(LsidResolver).resolve(lsid) {''}
|
|
7
|
+
LsidResolver.resolve(lsid).class.should == String
|
|
10
8
|
end
|
|
11
9
|
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
require_relative '../spec_helper'
|
|
3
|
+
|
|
4
|
+
describe ScientificNameCanonical do
|
|
5
|
+
before(:all) do
|
|
6
|
+
set_parser(ScientificNameCanonicalParser.new)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it 'should parse names with valid name part and unparseable rest' do
|
|
10
|
+
[
|
|
11
|
+
['Morea ssjjlajajaj324$33 234243242','Morea',
|
|
12
|
+
[{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
|
|
13
|
+
['Morea (Morea) Burt 2342343242 23424322342 23424234',
|
|
14
|
+
'Morea (Morea)', [{:genus=>{:string=>"Morea"},
|
|
15
|
+
:infragenus=>{:string=>"Morea"}}],
|
|
16
|
+
{0=>["genus", 5], 7=>["infragenus", 12]}],
|
|
17
|
+
['Morea (Morea) burtius 2342343242 23424322342 23424234',
|
|
18
|
+
'Morea (Morea) burtius', [{:genus=>{:string=>"Morea"},
|
|
19
|
+
:infragenus=>{:string=>"Morea"},
|
|
20
|
+
:species=>{:string=>"burtius"}}],
|
|
21
|
+
{0=>["genus", 5], 7=>["infragenus", 12], 14=>["species", 21]}],
|
|
22
|
+
['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',
|
|
23
|
+
[{:genus=>{:string=>"Moraea"}, :species=>{:string=>"spathulata"}}],
|
|
24
|
+
{0=>["genus", 6], 7=>["species", 17]} ],
|
|
25
|
+
['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',
|
|
26
|
+
[{:genus=>{:string=>"Verpericola"}, :species=>{:string=>"megasoma"}}],
|
|
27
|
+
{0=>["genus", 11], 12=>["species", 20]}]
|
|
28
|
+
].each do |n|
|
|
29
|
+
parse(n[0]).should_not be_nil
|
|
30
|
+
value(n[0]).should == n[1]
|
|
31
|
+
details(n[0]).should == n[2]
|
|
32
|
+
pos(n[0]).should == n[3]
|
|
33
|
+
parse(n[0]).hybrid.should be_false
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
|
-
|
|
3
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
|
2
|
+
require_relative '../spec_helper'
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
describe ScientificNameClean do
|
|
@@ -20,19 +19,36 @@ describe ScientificNameClean do
|
|
|
20
19
|
it 'should parse uninomial with author and year' do
|
|
21
20
|
sn = 'Pseudocercospora Speg.'
|
|
22
21
|
parse(sn).should_not be_nil
|
|
23
|
-
details(sn).should == [{:uninomial=>
|
|
22
|
+
details(sn).should == [{:uninomial=>
|
|
23
|
+
{:string=>"Pseudocercospora",
|
|
24
|
+
:authorship=>"Speg.",
|
|
25
|
+
:basionymAuthorTeam=>
|
|
26
|
+
{:authorTeam=>"Speg.", :author=>["Speg."]}}}]
|
|
24
27
|
pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 22]}
|
|
25
28
|
sn = 'Pseudocercospora Spegazzini, 1910'
|
|
26
29
|
parse(sn).should_not be_nil
|
|
27
30
|
value(sn).should == 'Pseudocercospora Spegazzini 1910'
|
|
28
|
-
details(sn).should == [{:uninomial=>
|
|
29
|
-
|
|
31
|
+
details(sn).should == [{:uninomial=>
|
|
32
|
+
{:string=>"Pseudocercospora",
|
|
33
|
+
:authorship=>"Spegazzini, 1910",
|
|
34
|
+
:basionymAuthorTeam=>
|
|
35
|
+
{:authorTeam=>"Spegazzini",
|
|
36
|
+
:author=>["Spegazzini"], :year=>"1910"}}}]
|
|
37
|
+
pos(sn).should == {0=>["uninomial", 16],
|
|
38
|
+
17=>["author_word", 27], 29=>["year", 33]}
|
|
30
39
|
end
|
|
31
40
|
|
|
32
41
|
it "should parse uninomials with uninomial ranks" do
|
|
33
42
|
sn = "Epacridaceae trib. Archerieae Crayn & Quinn"
|
|
34
43
|
parse(sn).should_not be_nil
|
|
35
|
-
details(sn).should == [{:uninomial=>
|
|
44
|
+
details(sn).should == [{:uninomial=>
|
|
45
|
+
{:string=>"Epacridaceae"},
|
|
46
|
+
:rank_uninomials=>"trib.",
|
|
47
|
+
:uninomial2=>{:string=>"Archerieae",
|
|
48
|
+
:authorship=>"Crayn & Quinn",
|
|
49
|
+
:basionymAuthorTeam=>
|
|
50
|
+
{:authorTeam=>"Crayn & Quinn",
|
|
51
|
+
:author=>["Crayn", "Quinn"]}}}]
|
|
36
52
|
end
|
|
37
53
|
|
|
38
54
|
it 'should parse names with a valid 2 letter genus' do
|
|
@@ -65,7 +81,9 @@ describe ScientificNameClean do
|
|
|
65
81
|
parse(sn).should_not be_nil
|
|
66
82
|
value(sn).should == 'Pseudocercospora dendrobii'
|
|
67
83
|
canonical(sn).should == 'Pseudocercospora dendrobii'
|
|
68
|
-
details(sn).should == [{:genus=>
|
|
84
|
+
details(sn).should == [{:genus=>
|
|
85
|
+
{:string=>"Pseudocercospora"},
|
|
86
|
+
:species=>{:string=>"dendrobii"}}]
|
|
69
87
|
pos(sn).should == {0=>["genus", 16], 21=>["species", 30]}
|
|
70
88
|
end
|
|
71
89
|
|
|
@@ -76,7 +94,9 @@ describe ScientificNameClean do
|
|
|
76
94
|
sn = 'Ps. dendrobii'
|
|
77
95
|
parse(sn).should_not be_nil
|
|
78
96
|
value(sn).should == 'Ps. dendrobii'
|
|
79
|
-
details(sn).should == [{:genus=>
|
|
97
|
+
details(sn).should == [{:genus=>
|
|
98
|
+
{:string=>"Ps."},
|
|
99
|
+
:species=>{:string=>"dendrobii"}}]
|
|
80
100
|
end
|
|
81
101
|
|
|
82
102
|
|
|
@@ -87,8 +107,16 @@ describe ScientificNameClean do
|
|
|
87
107
|
sn = "Platypus bicaudatulus Schedl, 1935h"
|
|
88
108
|
parse(sn).should_not be_nil
|
|
89
109
|
value(sn).should == "Platypus bicaudatulus Schedl 1935"
|
|
90
|
-
details(sn).should == [{:genus=>
|
|
91
|
-
|
|
110
|
+
details(sn).should == [{:genus=>
|
|
111
|
+
{:string=>"Platypus"},
|
|
112
|
+
:species=>{:string=>"bicaudatulus",
|
|
113
|
+
:authorship=>"Schedl, 1935h",
|
|
114
|
+
:basionymAuthorTeam=>
|
|
115
|
+
{:authorTeam=>"Schedl", :author=>["Schedl"],
|
|
116
|
+
:year=>"1935"}}}]
|
|
117
|
+
pos(sn).should == {0=>["genus", 8],
|
|
118
|
+
9=>["species", 21], 22=>["author_word", 28],
|
|
119
|
+
30=>["year", 35]}
|
|
92
120
|
parse("Platypus bicaudatulus Schedl, 1935B").should_not be_nil
|
|
93
121
|
sn = "Platypus bicaudatulus Schedl (1935h)"
|
|
94
122
|
parse(sn).should_not be_nil
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
|
-
|
|
3
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
|
2
|
+
require_relative '../spec_helper'
|
|
4
3
|
|
|
5
4
|
describe ScientificNameDirty do
|
|
6
5
|
before(:all) do
|
|
@@ -15,7 +14,13 @@ describe ScientificNameDirty do
|
|
|
15
14
|
sn = "Eichornia crassipes ( (Martius) ) Solms-Laub."
|
|
16
15
|
parse(sn).should_not be_nil
|
|
17
16
|
value(sn).should == "Eichornia crassipes (Martius) Solms-Laub."
|
|
18
|
-
details(sn).should == [{:genus=>{:string=>"Eichornia"},
|
|
17
|
+
details(sn).should == [{:genus=>{:string=>"Eichornia"},
|
|
18
|
+
:species=>{:string=>"crassipes",
|
|
19
|
+
:authorship=>"( (Martius) ) Solms-Laub.",
|
|
20
|
+
:combinationAuthorTeam=>{:authorTeam=>"Solms-Laub.",
|
|
21
|
+
:author=>["Solms-Laub."]},
|
|
22
|
+
:basionymAuthorTeam=>{:authorTeam=>"Martius",
|
|
23
|
+
:author=>["Martius"]}}}]
|
|
19
24
|
pos(sn).should == {0=>["genus", 9], 10=>["species", 19], 23=>["author_word", 30], 34=>["author_word", 45]}
|
|
20
25
|
end
|
|
21
26
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# encoding: utf-8
|
|
2
|
+
|
|
2
3
|
#NOTE: this spec needs compiled treetop files.
|
|
3
|
-
|
|
4
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
|
5
|
-
require File.expand_path(dir + '../../lib/biodiversity/parser')
|
|
4
|
+
require_relative '../spec_helper'
|
|
6
5
|
|
|
7
6
|
describe ScientificNameParser do
|
|
8
7
|
before(:all) do
|
|
@@ -15,10 +14,10 @@ describe ScientificNameParser do
|
|
|
15
14
|
|
|
16
15
|
it 'should ScientificNameParser::fix_case' do
|
|
17
16
|
names = [
|
|
18
|
-
[
|
|
19
|
-
[
|
|
20
|
-
[
|
|
21
|
-
[
|
|
17
|
+
['QUERCUS ALBA', 'Quercus alba'],
|
|
18
|
+
['QUERCUS (QUERCUS) ALBA', 'Quercus (Quercus) alba'],
|
|
19
|
+
['QÜERCUS', 'Qüercus'],
|
|
20
|
+
['PARDOSA MOéSTA', 'Pardosa moésta'],
|
|
22
21
|
]
|
|
23
22
|
names.each do |name, capitalization|
|
|
24
23
|
ScientificNameParser::fix_case(name).should == capitalization
|
|
@@ -34,7 +33,7 @@ describe ScientificNameParser do
|
|
|
34
33
|
|
|
35
34
|
# it 'should generate new test_file' do
|
|
36
35
|
# new_test = open(File.expand_path(dir +
|
|
37
|
-
#
|
|
36
|
+
# '../../spec/parser/test_data_new.txt'),'w')
|
|
38
37
|
# read_test_file do |y|
|
|
39
38
|
# if y[:comment]
|
|
40
39
|
# new_test.write y[:comment]
|
|
@@ -52,20 +51,20 @@ describe ScientificNameParser do
|
|
|
52
51
|
'"parser_version":"test_version","verbatim":"ddd sljlkj 3223452432"}}'
|
|
53
52
|
end
|
|
54
53
|
|
|
55
|
-
it
|
|
54
|
+
it 'should show version when the flag :show_version set to true' do
|
|
56
55
|
parse('Homo sapiens')[:scientificName][:parser_version].should_not be_nil
|
|
57
56
|
end
|
|
58
57
|
|
|
59
|
-
it
|
|
58
|
+
it 'should show version for not spelled names' do
|
|
60
59
|
parse('not_a_name')[:scientificName][:parser_version].should_not be_nil
|
|
61
60
|
end
|
|
62
61
|
|
|
63
|
-
it
|
|
62
|
+
it 'should generate version for viruses' do
|
|
64
63
|
parse('Nile virus')[:scientificName][:parser_version].should_not be_nil
|
|
65
64
|
end
|
|
66
65
|
end
|
|
67
66
|
|
|
68
|
-
describe
|
|
67
|
+
describe 'ScientificNameParser with ranked canonicals' do
|
|
69
68
|
before(:all) do
|
|
70
69
|
@parser = ScientificNameParser.new(canonical_with_rank: true)
|
|
71
70
|
end
|
|
@@ -101,12 +100,12 @@ describe "ScientificNameParser with ranked canonicals" do
|
|
|
101
100
|
end
|
|
102
101
|
|
|
103
102
|
describe ParallelParser do
|
|
104
|
-
it
|
|
103
|
+
it 'should find number of cpus' do
|
|
105
104
|
pparser = ParallelParser.new
|
|
106
105
|
pparser.cpu_num.should > 0
|
|
107
106
|
end
|
|
108
107
|
|
|
109
|
-
it
|
|
108
|
+
it 'should parse several names in parallel' do
|
|
110
109
|
names = []
|
|
111
110
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
|
112
111
|
names.uniq!
|
|
@@ -116,7 +115,7 @@ describe ParallelParser do
|
|
|
116
115
|
res.keys.size.should == names.size
|
|
117
116
|
end
|
|
118
117
|
|
|
119
|
-
it
|
|
118
|
+
it 'should parse several names in parallel with given num of processes' do
|
|
120
119
|
names = []
|
|
121
120
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
|
122
121
|
names.uniq!
|
|
@@ -126,8 +125,8 @@ describe ParallelParser do
|
|
|
126
125
|
res.keys.size.should == names.size
|
|
127
126
|
end
|
|
128
127
|
|
|
129
|
-
it
|
|
130
|
-
a hash with name as a key and parsed data as value
|
|
128
|
+
it 'should have parsed name in native ruby format and in returned as \
|
|
129
|
+
a hash with name as a key and parsed data as value' do
|
|
131
130
|
names = []
|
|
132
131
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
|
133
132
|
names.uniq!
|
data/spec/spec_helper.rb
CHANGED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
require 'yaml'
|
|
2
|
+
require 'treetop'
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'biodiversity'
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
RSpec.configure do |c|
|
|
8
|
+
c.mock_with :rr
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
dir = File.dirname(__FILE__)
|
|
12
|
+
Treetop.load(File.expand_path(File.join(dir,
|
|
13
|
+
'../lib/biodiversity/parser/scientific_name_clean')))
|
|
14
|
+
Treetop.load(File.expand_path(File.join(dir,
|
|
15
|
+
'../lib/biodiversity/parser/scientific_name_dirty')))
|
|
16
|
+
Treetop.load(File.expand_path(File.join(dir,
|
|
17
|
+
'../lib/biodiversity/parser/scientific_name_canonical')))
|
|
18
|
+
|
|
19
|
+
PARSER_TEST_VERSION = 'test_version'
|
|
20
|
+
|
|
21
|
+
def set_parser(parser)
|
|
22
|
+
@parser = parser
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def parse(input)
|
|
26
|
+
@parser.parse(input)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def value(input)
|
|
30
|
+
parse(input).value
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def canonical(input)
|
|
34
|
+
parse(input).canonical
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def details(input)
|
|
38
|
+
parse(input).details
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def pos(input)
|
|
42
|
+
parse(input).pos
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def json(input)
|
|
46
|
+
parse(input).
|
|
47
|
+
to_json.gsub(/"parser_version":"[^"]*"/,
|
|
48
|
+
%Q["parser_version":"#{PARSER_TEST_VERSION}"])
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def debug(input)
|
|
52
|
+
res = parse(input)
|
|
53
|
+
puts '<pre>'
|
|
54
|
+
if res
|
|
55
|
+
puts 'success!'
|
|
56
|
+
puts res.inspect
|
|
57
|
+
else
|
|
58
|
+
puts input
|
|
59
|
+
val = @parser.failure_reason.to_s.match(/column [0-9]*/).
|
|
60
|
+
to_s.gsub(/column /,'').to_i
|
|
61
|
+
print ('-' * (val - 1))
|
|
62
|
+
print "^ Computer says 'no'!\n"
|
|
63
|
+
puts @parser.failure_reason
|
|
64
|
+
puts @parser.to_yaml
|
|
65
|
+
end
|
|
66
|
+
puts '</pre>'
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def read_test_file
|
|
70
|
+
f = open(File.expand_path(File.join(File.dirname(__FILE__),
|
|
71
|
+
'parser/test_data.txt')))
|
|
72
|
+
f.each do |line|
|
|
73
|
+
name, jsn = line.split("|")
|
|
74
|
+
if line.match(/^\s*#/) == nil && name && jsn
|
|
75
|
+
yield({:name => name, :jsn => jsn})
|
|
76
|
+
else
|
|
77
|
+
yield({:comment => line})
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
metadata
CHANGED
|
@@ -1,36 +1,18 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: biodiversity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.1.
|
|
5
|
-
prerelease:
|
|
4
|
+
version: 3.1.3
|
|
6
5
|
platform: ruby
|
|
7
6
|
authors:
|
|
8
7
|
- Dmitry Mozzherin
|
|
9
8
|
autorequire:
|
|
10
9
|
bindir: bin
|
|
11
10
|
cert_chain: []
|
|
12
|
-
date: 2013-
|
|
11
|
+
date: 2013-12-18 00:00:00.000000000 Z
|
|
13
12
|
dependencies:
|
|
14
|
-
- !ruby/object:Gem::Dependency
|
|
15
|
-
name: rake
|
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
|
17
|
-
none: false
|
|
18
|
-
requirements:
|
|
19
|
-
- - ~>
|
|
20
|
-
- !ruby/object:Gem::Version
|
|
21
|
-
version: '10.0'
|
|
22
|
-
type: :runtime
|
|
23
|
-
prerelease: false
|
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
-
none: false
|
|
26
|
-
requirements:
|
|
27
|
-
- - ~>
|
|
28
|
-
- !ruby/object:Gem::Version
|
|
29
|
-
version: '10.0'
|
|
30
13
|
- !ruby/object:Gem::Dependency
|
|
31
14
|
name: treetop
|
|
32
15
|
requirement: !ruby/object:Gem::Requirement
|
|
33
|
-
none: false
|
|
34
16
|
requirements:
|
|
35
17
|
- - ~>
|
|
36
18
|
- !ruby/object:Gem::Version
|
|
@@ -38,7 +20,6 @@ dependencies:
|
|
|
38
20
|
type: :runtime
|
|
39
21
|
prerelease: false
|
|
40
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
41
|
-
none: false
|
|
42
23
|
requirements:
|
|
43
24
|
- - ~>
|
|
44
25
|
- !ruby/object:Gem::Version
|
|
@@ -46,23 +27,20 @@ dependencies:
|
|
|
46
27
|
- !ruby/object:Gem::Dependency
|
|
47
28
|
name: parallel
|
|
48
29
|
requirement: !ruby/object:Gem::Requirement
|
|
49
|
-
none: false
|
|
50
30
|
requirements:
|
|
51
31
|
- - ~>
|
|
52
32
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: '0.
|
|
33
|
+
version: '0.7'
|
|
54
34
|
type: :runtime
|
|
55
35
|
prerelease: false
|
|
56
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
-
none: false
|
|
58
37
|
requirements:
|
|
59
38
|
- - ~>
|
|
60
39
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '0.
|
|
40
|
+
version: '0.7'
|
|
62
41
|
- !ruby/object:Gem::Dependency
|
|
63
42
|
name: unicode_utils
|
|
64
43
|
requirement: !ruby/object:Gem::Requirement
|
|
65
|
-
none: false
|
|
66
44
|
requirements:
|
|
67
45
|
- - ~>
|
|
68
46
|
- !ruby/object:Gem::Version
|
|
@@ -70,114 +48,100 @@ dependencies:
|
|
|
70
48
|
type: :runtime
|
|
71
49
|
prerelease: false
|
|
72
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
73
|
-
none: false
|
|
74
51
|
requirements:
|
|
75
52
|
- - ~>
|
|
76
53
|
- !ruby/object:Gem::Version
|
|
77
54
|
version: '1.4'
|
|
78
55
|
- !ruby/object:Gem::Dependency
|
|
79
|
-
name:
|
|
56
|
+
name: bundler
|
|
80
57
|
requirement: !ruby/object:Gem::Requirement
|
|
81
|
-
none: false
|
|
82
58
|
requirements:
|
|
83
59
|
- - ~>
|
|
84
60
|
- !ruby/object:Gem::Version
|
|
85
|
-
version: '1.
|
|
61
|
+
version: '1.3'
|
|
86
62
|
type: :development
|
|
87
63
|
prerelease: false
|
|
88
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
89
|
-
none: false
|
|
90
65
|
requirements:
|
|
91
66
|
- - ~>
|
|
92
67
|
- !ruby/object:Gem::Version
|
|
93
|
-
version: '1.
|
|
68
|
+
version: '1.3'
|
|
94
69
|
- !ruby/object:Gem::Dependency
|
|
95
|
-
name:
|
|
70
|
+
name: rake
|
|
96
71
|
requirement: !ruby/object:Gem::Requirement
|
|
97
|
-
none: false
|
|
98
72
|
requirements:
|
|
99
73
|
- - ~>
|
|
100
74
|
- !ruby/object:Gem::Version
|
|
101
|
-
version: '1
|
|
75
|
+
version: '10.1'
|
|
102
76
|
type: :development
|
|
103
77
|
prerelease: false
|
|
104
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
105
|
-
none: false
|
|
106
79
|
requirements:
|
|
107
80
|
- - ~>
|
|
108
81
|
- !ruby/object:Gem::Version
|
|
109
|
-
version: '1
|
|
82
|
+
version: '10.1'
|
|
110
83
|
- !ruby/object:Gem::Dependency
|
|
111
|
-
name:
|
|
84
|
+
name: rspec
|
|
112
85
|
requirement: !ruby/object:Gem::Requirement
|
|
113
|
-
none: false
|
|
114
86
|
requirements:
|
|
115
|
-
- -
|
|
87
|
+
- - ~>
|
|
116
88
|
- !ruby/object:Gem::Version
|
|
117
|
-
version: '
|
|
118
|
-
type: :
|
|
89
|
+
version: '2.14'
|
|
90
|
+
type: :development
|
|
119
91
|
prerelease: false
|
|
120
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
-
none: false
|
|
122
93
|
requirements:
|
|
123
|
-
- -
|
|
94
|
+
- - ~>
|
|
124
95
|
- !ruby/object:Gem::Version
|
|
125
|
-
version: '
|
|
96
|
+
version: '2.14'
|
|
126
97
|
- !ruby/object:Gem::Dependency
|
|
127
|
-
name:
|
|
98
|
+
name: rr
|
|
128
99
|
requirement: !ruby/object:Gem::Requirement
|
|
129
|
-
none: false
|
|
130
100
|
requirements:
|
|
131
|
-
- -
|
|
101
|
+
- - ~>
|
|
132
102
|
- !ruby/object:Gem::Version
|
|
133
|
-
version: '
|
|
134
|
-
type: :
|
|
103
|
+
version: '1.1'
|
|
104
|
+
type: :development
|
|
135
105
|
prerelease: false
|
|
136
106
|
version_requirements: !ruby/object:Gem::Requirement
|
|
137
|
-
none: false
|
|
138
107
|
requirements:
|
|
139
|
-
- -
|
|
108
|
+
- - ~>
|
|
140
109
|
- !ruby/object:Gem::Version
|
|
141
|
-
version: '
|
|
110
|
+
version: '1.1'
|
|
142
111
|
- !ruby/object:Gem::Dependency
|
|
143
|
-
name:
|
|
112
|
+
name: debugger
|
|
144
113
|
requirement: !ruby/object:Gem::Requirement
|
|
145
|
-
none: false
|
|
146
114
|
requirements:
|
|
147
|
-
- -
|
|
115
|
+
- - ~>
|
|
148
116
|
- !ruby/object:Gem::Version
|
|
149
|
-
version: '
|
|
117
|
+
version: '1.6'
|
|
150
118
|
type: :development
|
|
151
119
|
prerelease: false
|
|
152
120
|
version_requirements: !ruby/object:Gem::Requirement
|
|
153
|
-
none: false
|
|
154
121
|
requirements:
|
|
155
|
-
- -
|
|
122
|
+
- - ~>
|
|
156
123
|
- !ruby/object:Gem::Version
|
|
157
|
-
version: '
|
|
124
|
+
version: '1.6'
|
|
158
125
|
description: Tools for biodiversity informatics
|
|
159
126
|
email: dmozzherin@gmail.com
|
|
160
127
|
executables:
|
|
161
128
|
- nnparse
|
|
162
129
|
- parserver
|
|
163
130
|
extensions: []
|
|
164
|
-
extra_rdoc_files:
|
|
165
|
-
- LICENSE
|
|
166
|
-
- README.md
|
|
131
|
+
extra_rdoc_files: []
|
|
167
132
|
files:
|
|
168
133
|
- .document
|
|
169
|
-
- .
|
|
134
|
+
- .gitignore
|
|
135
|
+
- .ruby-version
|
|
170
136
|
- .travis.yml
|
|
171
137
|
- CHANGELOG
|
|
172
138
|
- Gemfile
|
|
173
|
-
- Gemfile.lock
|
|
174
139
|
- LICENSE
|
|
175
140
|
- README.md
|
|
176
141
|
- Rakefile
|
|
177
|
-
- VERSION
|
|
178
142
|
- bin/nnparse
|
|
179
143
|
- bin/parserver
|
|
180
|
-
-
|
|
144
|
+
- biodiversity.gemspec
|
|
181
145
|
- examples/socket_client.rb
|
|
182
146
|
- lib/biodiversity.rb
|
|
183
147
|
- lib/biodiversity/guid.rb
|
|
@@ -186,42 +150,39 @@ files:
|
|
|
186
150
|
- lib/biodiversity/parser/scientific_name_canonical.treetop
|
|
187
151
|
- lib/biodiversity/parser/scientific_name_clean.treetop
|
|
188
152
|
- lib/biodiversity/parser/scientific_name_dirty.treetop
|
|
189
|
-
-
|
|
153
|
+
- lib/biodiversity/version.rb
|
|
154
|
+
- pkg/.gitkeep
|
|
190
155
|
- spec/biodiversity_spec.rb
|
|
191
156
|
- spec/guid/lsid.spec.rb
|
|
192
|
-
- spec/parser/
|
|
193
|
-
- spec/parser/
|
|
194
|
-
- spec/parser/
|
|
195
|
-
- spec/parser/
|
|
196
|
-
- spec/parser/spec_helper.rb
|
|
157
|
+
- spec/parser/scientific_name_canonical_spec.rb
|
|
158
|
+
- spec/parser/scientific_name_clean_spec.rb
|
|
159
|
+
- spec/parser/scientific_name_dirty_spec.rb
|
|
160
|
+
- spec/parser/scientific_name_spec.rb
|
|
197
161
|
- spec/parser/test_data.txt
|
|
198
162
|
- spec/parser/todo.txt
|
|
199
163
|
- spec/spec_helper.rb
|
|
200
|
-
homepage:
|
|
201
|
-
licenses:
|
|
164
|
+
homepage: https://github.com/GlobalNamesArchitecture/biodiversity
|
|
165
|
+
licenses:
|
|
166
|
+
- MIT
|
|
167
|
+
metadata: {}
|
|
202
168
|
post_install_message:
|
|
203
169
|
rdoc_options: []
|
|
204
170
|
require_paths:
|
|
205
171
|
- lib
|
|
206
172
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
207
|
-
none: false
|
|
208
173
|
requirements:
|
|
209
|
-
- -
|
|
174
|
+
- - '>='
|
|
210
175
|
- !ruby/object:Gem::Version
|
|
211
176
|
version: '0'
|
|
212
|
-
segments:
|
|
213
|
-
- 0
|
|
214
|
-
hash: 3336294247914629914
|
|
215
177
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
|
-
none: false
|
|
217
178
|
requirements:
|
|
218
|
-
- -
|
|
179
|
+
- - '>='
|
|
219
180
|
- !ruby/object:Gem::Version
|
|
220
181
|
version: '0'
|
|
221
182
|
requirements: []
|
|
222
183
|
rubyforge_project:
|
|
223
|
-
rubygems_version:
|
|
184
|
+
rubygems_version: 2.0.14
|
|
224
185
|
signing_key:
|
|
225
|
-
specification_version:
|
|
186
|
+
specification_version: 4
|
|
226
187
|
summary: Parser of scientific names
|
|
227
188
|
test_files: []
|
data/.rvmrc
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
rvm use ruby-1.9.3-p392@biodiversity --create
|
data/Gemfile.lock
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
GEM
|
|
2
|
-
remote: https://rubygems.org/
|
|
3
|
-
specs:
|
|
4
|
-
columnize (0.3.6)
|
|
5
|
-
debugger (1.5.0)
|
|
6
|
-
columnize (>= 0.3.1)
|
|
7
|
-
debugger-linecache (~> 1.2.0)
|
|
8
|
-
debugger-ruby_core_source (~> 1.2.0)
|
|
9
|
-
debugger-linecache (1.2.0)
|
|
10
|
-
debugger-ruby_core_source (1.2.0)
|
|
11
|
-
diff-lcs (1.2.4)
|
|
12
|
-
git (1.2.5)
|
|
13
|
-
jeweler (1.8.4)
|
|
14
|
-
bundler (~> 1.0)
|
|
15
|
-
git (>= 1.2.5)
|
|
16
|
-
rake
|
|
17
|
-
rdoc
|
|
18
|
-
json (1.7.7)
|
|
19
|
-
parallel (0.6.4)
|
|
20
|
-
polyglot (0.3.3)
|
|
21
|
-
rake (10.0.4)
|
|
22
|
-
rdoc (4.0.1)
|
|
23
|
-
json (~> 1.4)
|
|
24
|
-
rspec (2.13.0)
|
|
25
|
-
rspec-core (~> 2.13.0)
|
|
26
|
-
rspec-expectations (~> 2.13.0)
|
|
27
|
-
rspec-mocks (~> 2.13.0)
|
|
28
|
-
rspec-core (2.13.1)
|
|
29
|
-
rspec-expectations (2.13.0)
|
|
30
|
-
diff-lcs (>= 1.1.3, < 2.0)
|
|
31
|
-
rspec-mocks (2.13.1)
|
|
32
|
-
treetop (1.4.12)
|
|
33
|
-
polyglot
|
|
34
|
-
polyglot (>= 0.3.1)
|
|
35
|
-
unicode_utils (1.4.0)
|
|
36
|
-
|
|
37
|
-
PLATFORMS
|
|
38
|
-
ruby
|
|
39
|
-
|
|
40
|
-
DEPENDENCIES
|
|
41
|
-
debugger (~> 1.5)
|
|
42
|
-
jeweler (~> 1.8)
|
|
43
|
-
parallel (~> 0.6)
|
|
44
|
-
rake (~> 10.0)
|
|
45
|
-
rspec (~> 2.13)
|
|
46
|
-
treetop (~> 1.4)
|
|
47
|
-
unicode_utils (~> 1.4)
|
data/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
3.1.2
|
data/conf/environment.rb
DELETED
data/pkg/.gitignore
DELETED
|
File without changes
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# encoding: UTF-8
|
|
2
|
-
dir = File.dirname("__FILE__")
|
|
3
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
|
4
|
-
|
|
5
|
-
describe ScientificNameCanonical do
|
|
6
|
-
before(:all) do
|
|
7
|
-
set_parser(ScientificNameCanonicalParser.new)
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it 'should parse names with valid name part and unparseable rest' do
|
|
11
|
-
[
|
|
12
|
-
['Morea ssjjlajajaj324$33 234243242','Morea', [{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
|
|
13
|
-
['Morea (Morea) Burt 2342343242 23424322342 23424234', 'Morea (Morea)', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}}], {0=>["genus", 5], 7=>["infragenus", 12]}],
|
|
14
|
-
['Morea (Morea) burtius 2342343242 23424322342 23424234', 'Morea (Morea) burtius', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}, :species=>{:string=>"burtius"}}], {0=>["genus", 5], 7=>["infragenus", 12], 14=>["species", 21]}],
|
|
15
|
-
['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',[{:genus=>{:string=>"Moraea"}, :species=>{:string=>"spathulata"}}], {0=>["genus", 6], 7=>["species", 17]} ],
|
|
16
|
-
['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',[{:genus=>{:string=>"Verpericola"}, :species=>{:string=>"megasoma"}}], {0=>["genus", 11], 12=>["species", 20]}]
|
|
17
|
-
].each do |n|
|
|
18
|
-
parse(n[0]).should_not be_nil
|
|
19
|
-
value(n[0]).should == n[1]
|
|
20
|
-
details(n[0]).should == n[2]
|
|
21
|
-
pos(n[0]).should == n[3]
|
|
22
|
-
parse(n[0]).hybrid.should be_false
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
end
|
data/spec/parser/spec_helper.rb
DELETED
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
dir = File.dirname("__FILE__")
|
|
2
|
-
require 'rubygems'
|
|
3
|
-
require 'yaml'
|
|
4
|
-
require 'treetop'
|
|
5
|
-
require 'json'
|
|
6
|
-
require File.expand_path(dir + '../../lib/biodiversity/parser')
|
|
7
|
-
|
|
8
|
-
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_clean'))
|
|
9
|
-
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_dirty'))
|
|
10
|
-
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_canonical'))
|
|
11
|
-
|
|
12
|
-
PARSER_TEST_VERSION = "test_version"
|
|
13
|
-
|
|
14
|
-
def set_parser(parser)
|
|
15
|
-
@parser = parser
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def parse(input)
|
|
19
|
-
@parser.parse(input)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def value(input)
|
|
23
|
-
parse(input).value
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def canonical(input)
|
|
27
|
-
parse(input).canonical
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def details(input)
|
|
31
|
-
parse(input).details
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def pos(input)
|
|
35
|
-
parse(input).pos
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
def json(input)
|
|
39
|
-
parse(input).to_json.gsub(/"parser_version":"[^"]*"/, %Q["parser_version":"#{PARSER_TEST_VERSION}"])
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
def debug(input)
|
|
43
|
-
res = parse(input)
|
|
44
|
-
puts "<pre>"
|
|
45
|
-
if res
|
|
46
|
-
puts 'success!'
|
|
47
|
-
puts res.inspect
|
|
48
|
-
else
|
|
49
|
-
puts input
|
|
50
|
-
val = @parser.failure_reason.to_s.match(/column [0-9]*/).to_s.gsub(/column /,'').to_i
|
|
51
|
-
print ("-" * (val - 1))
|
|
52
|
-
print "^ Computer says 'no'!\n"
|
|
53
|
-
puts @parser.failure_reason
|
|
54
|
-
puts @parser.to_yaml
|
|
55
|
-
end
|
|
56
|
-
puts "</pre>"
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
def read_test_file
|
|
60
|
-
f = open(File.expand_path(File.dirname("__FILE__") + "../../spec/parser/test_data.txt"))
|
|
61
|
-
f.each do |line|
|
|
62
|
-
name, jsn = line.split("|")
|
|
63
|
-
if line.match(/^\s*#/) == nil && name && jsn
|
|
64
|
-
yield({:name => name, :jsn => jsn})
|
|
65
|
-
else
|
|
66
|
-
yield({:comment => line})
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
|