biodiversity 3.1.2 → 3.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.ruby-version +1 -0
- data/.travis.yml +4 -3
- data/Gemfile +1 -13
- data/Rakefile +16 -34
- data/bin/nnparse +8 -19
- data/bin/parserver +1 -2
- data/biodiversity.gemspec +28 -0
- data/lib/biodiversity.rb +12 -6
- data/lib/biodiversity/guid.rb +1 -2
- data/lib/biodiversity/guid/lsid.rb +2 -4
- data/lib/biodiversity/parser.rb +5 -12
- data/lib/biodiversity/version.rb +3 -0
- data/spec/biodiversity_spec.rb +9 -0
- data/spec/guid/lsid.spec.rb +5 -7
- data/spec/parser/scientific_name_canonical_spec.rb +37 -0
- data/spec/parser/{scientific_name_clean.spec.rb → scientific_name_clean_spec.rb} +38 -10
- data/spec/parser/{scientific_name_dirty.spec.rb → scientific_name_dirty_spec.rb} +8 -3
- data/spec/parser/{scientific_name.spec.rb → scientific_name_spec.rb} +16 -17
- data/spec/spec_helper.rb +82 -0
- metadata +45 -84
- data/.rvmrc +0 -1
- data/Gemfile.lock +0 -47
- data/VERSION +0 -1
- data/conf/environment.rb +0 -3
- data/pkg/.gitignore +0 -0
- data/spec/parser/scientific_name_canonical.spec.rb +0 -26
- data/spec/parser/spec_helper.rb +0 -70
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c6dd51dda2a710e62e09554051bf9f26569e8f2c
|
4
|
+
data.tar.gz: 24d44fde0d6d582f485a45cfb0c809c22793aac3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5e5505a4a8dd3980eac1f126886bb6d59840c2380149d0690666311445a3875f0693dd23b431d96e96736859e40fe4758e8e542e7cf11131cb83f4e3c36400b5
|
7
|
+
data.tar.gz: f33859933760acd98e440df9b059473f651eaa918d5cb799f855d506793d1877456d9bbf4eb42dc949676a2342b79086190b6faf8831ad05f2e39abbee0ff23a
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.0.0-p353
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,15 +1,3 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
|
4
|
-
gem 'treetop', '~> 1.4'
|
5
|
-
gem 'parallel', '~> 0.6'
|
6
|
-
gem 'unicode_utils', '~> 1.4'
|
7
|
-
|
8
|
-
group :development do
|
9
|
-
gem 'debugger', '~> 1.5'
|
10
|
-
gem 'jeweler', '~> 1.8'
|
11
|
-
end
|
12
|
-
|
13
|
-
group :test do
|
14
|
-
gem 'rspec', '~> 2.13'
|
15
|
-
end
|
3
|
+
gemspec
|
data/Rakefile
CHANGED
@@ -1,12 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
begin
|
5
|
+
Bundler.setup(:default, :development)
|
6
|
+
rescue Bundler::BundlerError => e
|
7
|
+
$stderr.puts e.message
|
8
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
9
|
+
exit e.status_code
|
10
|
+
end
|
11
|
+
|
5
12
|
require 'rspec/core'
|
6
13
|
require 'rspec/core/rake_task'
|
14
|
+
require 'rake/dsl_definition'
|
15
|
+
require 'rake'
|
16
|
+
require 'rspec'
|
17
|
+
require 'rspec/core/rake_task'
|
7
18
|
|
8
|
-
#Gem::manage_gems
|
9
|
-
#require 'rake/gempackagetask'
|
10
19
|
|
11
20
|
task :default => :spec
|
12
21
|
|
@@ -14,35 +23,8 @@ RSpec::Core::RakeTask.new do |t|
|
|
14
23
|
t.pattern = 'spec/**/*spec.rb'
|
15
24
|
end
|
16
25
|
|
17
|
-
ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
|
18
|
-
|
19
|
-
|
20
|
-
begin
|
21
|
-
require 'jeweler'
|
22
|
-
Jeweler::Tasks.new do |gem|
|
23
|
-
gem.name = 'biodiversity'
|
24
|
-
#To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
|
25
|
-
gem.summary = 'Parser of scientific names'
|
26
|
-
gem.description = 'Tools for biodiversity informatics'
|
27
|
-
gem.email = 'dmozzherin@gmail.com'
|
28
|
-
gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
|
29
|
-
gem.authors = ['Dmitry Mozzherin']
|
30
|
-
gem.has_rdoc = false
|
31
|
-
gem.bindir = 'bin'
|
32
|
-
gem.executables = ['nnparse', 'parserver']
|
33
|
-
gem.add_dependency('treetop')
|
34
|
-
gem.add_dependency('parallel')
|
35
|
-
# gem.add_dependency('json') if ruby_version < 19
|
36
|
-
gem.add_development_dependency "rspec"
|
37
|
-
# gem is a Gem::Specification...
|
38
|
-
# see http://www.rubygems.org/read/chapter/20 for additional settings
|
39
|
-
end
|
40
|
-
rescue LoadError
|
41
|
-
puts 'Jeweler (or a dependency) not available. ' +
|
42
|
-
'Install it with: sudo gem install jeweler'
|
43
|
-
end
|
44
|
-
|
45
26
|
task :tt do
|
27
|
+
dir = File.dirname(__FILE__)
|
46
28
|
['scientific_name_clean',
|
47
29
|
'scientific_name_dirty',
|
48
30
|
'scientific_name_canonical'].each do |f|
|
data/bin/nnparse
CHANGED
@@ -1,24 +1,20 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
|
-
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
|
4
|
-
if ruby_min_version < 19
|
5
|
-
require 'rubygems'
|
6
|
-
gem_name = 'biodiversity'
|
7
|
-
else
|
8
|
-
gem_name = 'biodiversity19'
|
9
|
-
end
|
10
|
-
gem gem_name rescue nil
|
11
3
|
|
12
|
-
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
13
4
|
require 'biodiversity'
|
14
5
|
require 'json'
|
15
6
|
|
16
7
|
def parser_error(name)
|
17
|
-
{'scientificName' =>
|
8
|
+
{ 'scientificName' =>
|
9
|
+
{ 'parsed' => false,
|
10
|
+
'verbatim' => name,
|
11
|
+
'error' => 'Parser error' } }.to_json
|
18
12
|
end
|
19
13
|
|
20
14
|
if ARGV.empty?
|
21
|
-
puts "Usage:\n\
|
15
|
+
puts "Usage:\n\n" +
|
16
|
+
"nnparse file_with_scientific_names [output_file]\n\n" +
|
17
|
+
"default output_file is parsed.json\n\n"
|
22
18
|
exit
|
23
19
|
end
|
24
20
|
|
@@ -30,21 +26,14 @@ p = ScientificNameParser.new
|
|
30
26
|
o = open(output, 'w')
|
31
27
|
count = 0
|
32
28
|
puts 'Parsing...'
|
33
|
-
f =
|
29
|
+
f = open(input, 'r:utf-8')
|
34
30
|
f.each do |line|
|
35
31
|
count += 1
|
36
32
|
puts("%s lines parsed" % count) if count % 10000 == 0
|
37
33
|
name = line.gsub(/^[\d]*\s*/, '').strip
|
38
34
|
begin
|
39
|
-
if ruby_min_version < 19
|
40
|
-
old_kcode = $KCODE
|
41
|
-
$KCODE = 'NONE'
|
42
|
-
end
|
43
35
|
p.parse(name)
|
44
36
|
parsed_data = p.parsed.all_json rescue parser_error(name)
|
45
|
-
if ruby_min_version < 19
|
46
|
-
$KCODE = old_kcode
|
47
|
-
end
|
48
37
|
rescue
|
49
38
|
parsed_data = parser_error(name)
|
50
39
|
end
|
data/bin/parserver
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'biodiversity/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = 'biodiversity'
|
7
|
+
gem.version = Biodiversity::VERSION
|
8
|
+
gem.homepage = 'https://github.com/GlobalNamesArchitecture/biodiversity'
|
9
|
+
gem.license = 'MIT'
|
10
|
+
gem.summary = %Q{Parser of scientific names}
|
11
|
+
gem.description = %Q{Tools for biodiversity informatics}
|
12
|
+
gem.authors = ['Dmitry Mozzherin']
|
13
|
+
gem.email = 'dmozzherin@gmail.com'
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split("\n")
|
16
|
+
gem.executables = ['nnparse', 'parserver']
|
17
|
+
gem.require_paths = ['lib']
|
18
|
+
|
19
|
+
gem.add_runtime_dependency 'treetop', '~> 1.4'
|
20
|
+
gem.add_runtime_dependency 'parallel', '~> 0.7'
|
21
|
+
gem.add_runtime_dependency 'unicode_utils', '~> 1.4'
|
22
|
+
|
23
|
+
gem.add_development_dependency 'bundler', '~> 1.3'
|
24
|
+
gem.add_development_dependency 'rake', '~> 10.1'
|
25
|
+
gem.add_development_dependency 'rspec', '~> 2.14'
|
26
|
+
gem.add_development_dependency 'rr', '~> 1.1'
|
27
|
+
gem.add_development_dependency 'debugger', '~> 1.6'
|
28
|
+
end
|
data/lib/biodiversity.rb
CHANGED
@@ -1,9 +1,15 @@
|
|
1
|
-
require 'rubygems'
|
2
1
|
require 'treetop'
|
2
|
+
require 'json'
|
3
|
+
require 'open-uri'
|
4
|
+
require_relative 'biodiversity/version'
|
5
|
+
require_relative 'biodiversity/parser'
|
6
|
+
require_relative 'biodiversity/guid'
|
3
7
|
|
4
|
-
|
8
|
+
module Biodiversity
|
9
|
+
LSID_RESOLVER_URL = 'http://lsid.tdwg.org/'
|
10
|
+
|
11
|
+
def self.version
|
12
|
+
VERSION
|
13
|
+
end
|
14
|
+
end
|
5
15
|
|
6
|
-
BIODIVERSITY_ROOT = File.join(dir, 'biodiversity')
|
7
|
-
require File.join(dir, "/../conf/environment")
|
8
|
-
require File.join(BIODIVERSITY_ROOT, "parser")
|
9
|
-
require File.join(BIODIVERSITY_ROOT, "guid")
|
data/lib/biodiversity/guid.rb
CHANGED
@@ -1,2 +1 @@
|
|
1
|
-
|
2
|
-
require File.join(dir, *%w[guid lsid])
|
1
|
+
require_relative 'guid/lsid'
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'open-uri'
|
2
|
-
|
3
1
|
class LsidResolver
|
4
2
|
def self.resolve(lsid)
|
5
3
|
http_get_rdf(lsid)
|
@@ -8,11 +6,11 @@ class LsidResolver
|
|
8
6
|
protected
|
9
7
|
def self.http_get_rdf(lsid)
|
10
8
|
rdf = ''
|
11
|
-
open(LSID_RESOLVER_URL + lsid) do |f|
|
9
|
+
open(Biodiversity::LSID_RESOLVER_URL + lsid) do |f|
|
12
10
|
f.each do |line|
|
13
11
|
rdf += line if !line.strip.blank?
|
14
12
|
end
|
15
13
|
end
|
16
14
|
rdf
|
17
15
|
end
|
18
|
-
end
|
16
|
+
end
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
require File.join(dir, *%w[parser scientific_name_canonical])
|
6
|
-
require 'rubygems'
|
7
|
-
require 'json'
|
2
|
+
require_relative 'parser/scientific_name_clean'
|
3
|
+
require_relative 'parser/scientific_name_dirty'
|
4
|
+
require_relative 'parser/scientific_name_canonical'
|
8
5
|
|
9
6
|
module PreProcessor
|
10
7
|
NOTES = /\s+(species\s+group|species\s+complex|group|author)\b.*$/i
|
@@ -108,10 +105,6 @@ end
|
|
108
105
|
# end
|
109
106
|
|
110
107
|
class ScientificNameParser
|
111
|
-
VERSION = open(File.join(File.dirname(__FILE__),
|
112
|
-
'..',
|
113
|
-
'..',
|
114
|
-
'VERSION')).readline.strip
|
115
108
|
|
116
109
|
FAILED_RESULT = ->(name) do
|
117
110
|
{ scientificName:
|
@@ -120,7 +113,7 @@ class ScientificNameParser
|
|
120
113
|
end
|
121
114
|
|
122
115
|
def self.version
|
123
|
-
VERSION
|
116
|
+
Biodiversity::VERSION
|
124
117
|
end
|
125
118
|
|
126
119
|
def self.fix_case(name_string)
|
@@ -213,7 +206,7 @@ class ScientificNameParser
|
|
213
206
|
def @parsed.all(opts = {})
|
214
207
|
canonical_with_rank = !!opts[:canonical_with_rank]
|
215
208
|
parsed = self.class != Hash
|
216
|
-
res = { parsed: parsed, parser_version: ScientificNameParser::
|
209
|
+
res = { parsed: parsed, parser_version: ScientificNameParser::version}
|
217
210
|
if parsed
|
218
211
|
hybrid = self.hybrid rescue false
|
219
212
|
res.merge!({
|
data/spec/biodiversity_spec.rb
CHANGED
data/spec/guid/lsid.spec.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
|
2
|
-
require 'rubygems'
|
3
|
-
require File.expand_path(dir + "../../conf/environment")
|
4
|
-
require File.expand_path(dir + "../../lib/biodiversity/guid")
|
1
|
+
require_relative '../spec_helper'
|
5
2
|
|
6
3
|
describe LsidResolver do
|
7
|
-
it
|
8
|
-
lsid =
|
9
|
-
LsidResolver.resolve(lsid)
|
4
|
+
it 'should return RFD document from lsid' do
|
5
|
+
lsid = 'urn:lsid:ubio.org:classificationbank:2232671'
|
6
|
+
stub(LsidResolver).resolve(lsid) {''}
|
7
|
+
LsidResolver.resolve(lsid).class.should == String
|
10
8
|
end
|
11
9
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require_relative '../spec_helper'
|
3
|
+
|
4
|
+
describe ScientificNameCanonical do
|
5
|
+
before(:all) do
|
6
|
+
set_parser(ScientificNameCanonicalParser.new)
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should parse names with valid name part and unparseable rest' do
|
10
|
+
[
|
11
|
+
['Morea ssjjlajajaj324$33 234243242','Morea',
|
12
|
+
[{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
|
13
|
+
['Morea (Morea) Burt 2342343242 23424322342 23424234',
|
14
|
+
'Morea (Morea)', [{:genus=>{:string=>"Morea"},
|
15
|
+
:infragenus=>{:string=>"Morea"}}],
|
16
|
+
{0=>["genus", 5], 7=>["infragenus", 12]}],
|
17
|
+
['Morea (Morea) burtius 2342343242 23424322342 23424234',
|
18
|
+
'Morea (Morea) burtius', [{:genus=>{:string=>"Morea"},
|
19
|
+
:infragenus=>{:string=>"Morea"},
|
20
|
+
:species=>{:string=>"burtius"}}],
|
21
|
+
{0=>["genus", 5], 7=>["infragenus", 12], 14=>["species", 21]}],
|
22
|
+
['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',
|
23
|
+
[{:genus=>{:string=>"Moraea"}, :species=>{:string=>"spathulata"}}],
|
24
|
+
{0=>["genus", 6], 7=>["species", 17]} ],
|
25
|
+
['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',
|
26
|
+
[{:genus=>{:string=>"Verpericola"}, :species=>{:string=>"megasoma"}}],
|
27
|
+
{0=>["genus", 11], 12=>["species", 20]}]
|
28
|
+
].each do |n|
|
29
|
+
parse(n[0]).should_not be_nil
|
30
|
+
value(n[0]).should == n[1]
|
31
|
+
details(n[0]).should == n[2]
|
32
|
+
pos(n[0]).should == n[3]
|
33
|
+
parse(n[0]).hybrid.should be_false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
2
|
+
require_relative '../spec_helper'
|
4
3
|
|
5
4
|
|
6
5
|
describe ScientificNameClean do
|
@@ -20,19 +19,36 @@ describe ScientificNameClean do
|
|
20
19
|
it 'should parse uninomial with author and year' do
|
21
20
|
sn = 'Pseudocercospora Speg.'
|
22
21
|
parse(sn).should_not be_nil
|
23
|
-
details(sn).should == [{:uninomial=>
|
22
|
+
details(sn).should == [{:uninomial=>
|
23
|
+
{:string=>"Pseudocercospora",
|
24
|
+
:authorship=>"Speg.",
|
25
|
+
:basionymAuthorTeam=>
|
26
|
+
{:authorTeam=>"Speg.", :author=>["Speg."]}}}]
|
24
27
|
pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 22]}
|
25
28
|
sn = 'Pseudocercospora Spegazzini, 1910'
|
26
29
|
parse(sn).should_not be_nil
|
27
30
|
value(sn).should == 'Pseudocercospora Spegazzini 1910'
|
28
|
-
details(sn).should == [{:uninomial=>
|
29
|
-
|
31
|
+
details(sn).should == [{:uninomial=>
|
32
|
+
{:string=>"Pseudocercospora",
|
33
|
+
:authorship=>"Spegazzini, 1910",
|
34
|
+
:basionymAuthorTeam=>
|
35
|
+
{:authorTeam=>"Spegazzini",
|
36
|
+
:author=>["Spegazzini"], :year=>"1910"}}}]
|
37
|
+
pos(sn).should == {0=>["uninomial", 16],
|
38
|
+
17=>["author_word", 27], 29=>["year", 33]}
|
30
39
|
end
|
31
40
|
|
32
41
|
it "should parse uninomials with uninomial ranks" do
|
33
42
|
sn = "Epacridaceae trib. Archerieae Crayn & Quinn"
|
34
43
|
parse(sn).should_not be_nil
|
35
|
-
details(sn).should == [{:uninomial=>
|
44
|
+
details(sn).should == [{:uninomial=>
|
45
|
+
{:string=>"Epacridaceae"},
|
46
|
+
:rank_uninomials=>"trib.",
|
47
|
+
:uninomial2=>{:string=>"Archerieae",
|
48
|
+
:authorship=>"Crayn & Quinn",
|
49
|
+
:basionymAuthorTeam=>
|
50
|
+
{:authorTeam=>"Crayn & Quinn",
|
51
|
+
:author=>["Crayn", "Quinn"]}}}]
|
36
52
|
end
|
37
53
|
|
38
54
|
it 'should parse names with a valid 2 letter genus' do
|
@@ -65,7 +81,9 @@ describe ScientificNameClean do
|
|
65
81
|
parse(sn).should_not be_nil
|
66
82
|
value(sn).should == 'Pseudocercospora dendrobii'
|
67
83
|
canonical(sn).should == 'Pseudocercospora dendrobii'
|
68
|
-
details(sn).should == [{:genus=>
|
84
|
+
details(sn).should == [{:genus=>
|
85
|
+
{:string=>"Pseudocercospora"},
|
86
|
+
:species=>{:string=>"dendrobii"}}]
|
69
87
|
pos(sn).should == {0=>["genus", 16], 21=>["species", 30]}
|
70
88
|
end
|
71
89
|
|
@@ -76,7 +94,9 @@ describe ScientificNameClean do
|
|
76
94
|
sn = 'Ps. dendrobii'
|
77
95
|
parse(sn).should_not be_nil
|
78
96
|
value(sn).should == 'Ps. dendrobii'
|
79
|
-
details(sn).should == [{:genus=>
|
97
|
+
details(sn).should == [{:genus=>
|
98
|
+
{:string=>"Ps."},
|
99
|
+
:species=>{:string=>"dendrobii"}}]
|
80
100
|
end
|
81
101
|
|
82
102
|
|
@@ -87,8 +107,16 @@ describe ScientificNameClean do
|
|
87
107
|
sn = "Platypus bicaudatulus Schedl, 1935h"
|
88
108
|
parse(sn).should_not be_nil
|
89
109
|
value(sn).should == "Platypus bicaudatulus Schedl 1935"
|
90
|
-
details(sn).should == [{:genus=>
|
91
|
-
|
110
|
+
details(sn).should == [{:genus=>
|
111
|
+
{:string=>"Platypus"},
|
112
|
+
:species=>{:string=>"bicaudatulus",
|
113
|
+
:authorship=>"Schedl, 1935h",
|
114
|
+
:basionymAuthorTeam=>
|
115
|
+
{:authorTeam=>"Schedl", :author=>["Schedl"],
|
116
|
+
:year=>"1935"}}}]
|
117
|
+
pos(sn).should == {0=>["genus", 8],
|
118
|
+
9=>["species", 21], 22=>["author_word", 28],
|
119
|
+
30=>["year", 35]}
|
92
120
|
parse("Platypus bicaudatulus Schedl, 1935B").should_not be_nil
|
93
121
|
sn = "Platypus bicaudatulus Schedl (1935h)"
|
94
122
|
parse(sn).should_not be_nil
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
2
|
+
require_relative '../spec_helper'
|
4
3
|
|
5
4
|
describe ScientificNameDirty do
|
6
5
|
before(:all) do
|
@@ -15,7 +14,13 @@ describe ScientificNameDirty do
|
|
15
14
|
sn = "Eichornia crassipes ( (Martius) ) Solms-Laub."
|
16
15
|
parse(sn).should_not be_nil
|
17
16
|
value(sn).should == "Eichornia crassipes (Martius) Solms-Laub."
|
18
|
-
details(sn).should == [{:genus=>{:string=>"Eichornia"},
|
17
|
+
details(sn).should == [{:genus=>{:string=>"Eichornia"},
|
18
|
+
:species=>{:string=>"crassipes",
|
19
|
+
:authorship=>"( (Martius) ) Solms-Laub.",
|
20
|
+
:combinationAuthorTeam=>{:authorTeam=>"Solms-Laub.",
|
21
|
+
:author=>["Solms-Laub."]},
|
22
|
+
:basionymAuthorTeam=>{:authorTeam=>"Martius",
|
23
|
+
:author=>["Martius"]}}}]
|
19
24
|
pos(sn).should == {0=>["genus", 9], 10=>["species", 19], 23=>["author_word", 30], 34=>["author_word", 45]}
|
20
25
|
end
|
21
26
|
|
@@ -1,8 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
|
2
3
|
#NOTE: this spec needs compiled treetop files.
|
3
|
-
|
4
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
5
|
-
require File.expand_path(dir + '../../lib/biodiversity/parser')
|
4
|
+
require_relative '../spec_helper'
|
6
5
|
|
7
6
|
describe ScientificNameParser do
|
8
7
|
before(:all) do
|
@@ -15,10 +14,10 @@ describe ScientificNameParser do
|
|
15
14
|
|
16
15
|
it 'should ScientificNameParser::fix_case' do
|
17
16
|
names = [
|
18
|
-
[
|
19
|
-
[
|
20
|
-
[
|
21
|
-
[
|
17
|
+
['QUERCUS ALBA', 'Quercus alba'],
|
18
|
+
['QUERCUS (QUERCUS) ALBA', 'Quercus (Quercus) alba'],
|
19
|
+
['QÜERCUS', 'Qüercus'],
|
20
|
+
['PARDOSA MOéSTA', 'Pardosa moésta'],
|
22
21
|
]
|
23
22
|
names.each do |name, capitalization|
|
24
23
|
ScientificNameParser::fix_case(name).should == capitalization
|
@@ -34,7 +33,7 @@ describe ScientificNameParser do
|
|
34
33
|
|
35
34
|
# it 'should generate new test_file' do
|
36
35
|
# new_test = open(File.expand_path(dir +
|
37
|
-
#
|
36
|
+
# '../../spec/parser/test_data_new.txt'),'w')
|
38
37
|
# read_test_file do |y|
|
39
38
|
# if y[:comment]
|
40
39
|
# new_test.write y[:comment]
|
@@ -52,20 +51,20 @@ describe ScientificNameParser do
|
|
52
51
|
'"parser_version":"test_version","verbatim":"ddd sljlkj 3223452432"}}'
|
53
52
|
end
|
54
53
|
|
55
|
-
it
|
54
|
+
it 'should show version when the flag :show_version set to true' do
|
56
55
|
parse('Homo sapiens')[:scientificName][:parser_version].should_not be_nil
|
57
56
|
end
|
58
57
|
|
59
|
-
it
|
58
|
+
it 'should show version for not spelled names' do
|
60
59
|
parse('not_a_name')[:scientificName][:parser_version].should_not be_nil
|
61
60
|
end
|
62
61
|
|
63
|
-
it
|
62
|
+
it 'should generate version for viruses' do
|
64
63
|
parse('Nile virus')[:scientificName][:parser_version].should_not be_nil
|
65
64
|
end
|
66
65
|
end
|
67
66
|
|
68
|
-
describe
|
67
|
+
describe 'ScientificNameParser with ranked canonicals' do
|
69
68
|
before(:all) do
|
70
69
|
@parser = ScientificNameParser.new(canonical_with_rank: true)
|
71
70
|
end
|
@@ -101,12 +100,12 @@ describe "ScientificNameParser with ranked canonicals" do
|
|
101
100
|
end
|
102
101
|
|
103
102
|
describe ParallelParser do
|
104
|
-
it
|
103
|
+
it 'should find number of cpus' do
|
105
104
|
pparser = ParallelParser.new
|
106
105
|
pparser.cpu_num.should > 0
|
107
106
|
end
|
108
107
|
|
109
|
-
it
|
108
|
+
it 'should parse several names in parallel' do
|
110
109
|
names = []
|
111
110
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
112
111
|
names.uniq!
|
@@ -116,7 +115,7 @@ describe ParallelParser do
|
|
116
115
|
res.keys.size.should == names.size
|
117
116
|
end
|
118
117
|
|
119
|
-
it
|
118
|
+
it 'should parse several names in parallel with given num of processes' do
|
120
119
|
names = []
|
121
120
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
122
121
|
names.uniq!
|
@@ -126,8 +125,8 @@ describe ParallelParser do
|
|
126
125
|
res.keys.size.should == names.size
|
127
126
|
end
|
128
127
|
|
129
|
-
it
|
130
|
-
a hash with name as a key and parsed data as value
|
128
|
+
it 'should have parsed name in native ruby format and in returned as \
|
129
|
+
a hash with name as a key and parsed data as value' do
|
131
130
|
names = []
|
132
131
|
read_test_file { |n| names << (n[:name]) if n[:name] }
|
133
132
|
names.uniq!
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'treetop'
|
3
|
+
require 'json'
|
4
|
+
require 'biodiversity'
|
5
|
+
|
6
|
+
|
7
|
+
RSpec.configure do |c|
|
8
|
+
c.mock_with :rr
|
9
|
+
end
|
10
|
+
|
11
|
+
dir = File.dirname(__FILE__)
|
12
|
+
Treetop.load(File.expand_path(File.join(dir,
|
13
|
+
'../lib/biodiversity/parser/scientific_name_clean')))
|
14
|
+
Treetop.load(File.expand_path(File.join(dir,
|
15
|
+
'../lib/biodiversity/parser/scientific_name_dirty')))
|
16
|
+
Treetop.load(File.expand_path(File.join(dir,
|
17
|
+
'../lib/biodiversity/parser/scientific_name_canonical')))
|
18
|
+
|
19
|
+
PARSER_TEST_VERSION = 'test_version'
|
20
|
+
|
21
|
+
def set_parser(parser)
|
22
|
+
@parser = parser
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse(input)
|
26
|
+
@parser.parse(input)
|
27
|
+
end
|
28
|
+
|
29
|
+
def value(input)
|
30
|
+
parse(input).value
|
31
|
+
end
|
32
|
+
|
33
|
+
def canonical(input)
|
34
|
+
parse(input).canonical
|
35
|
+
end
|
36
|
+
|
37
|
+
def details(input)
|
38
|
+
parse(input).details
|
39
|
+
end
|
40
|
+
|
41
|
+
def pos(input)
|
42
|
+
parse(input).pos
|
43
|
+
end
|
44
|
+
|
45
|
+
def json(input)
|
46
|
+
parse(input).
|
47
|
+
to_json.gsub(/"parser_version":"[^"]*"/,
|
48
|
+
%Q["parser_version":"#{PARSER_TEST_VERSION}"])
|
49
|
+
end
|
50
|
+
|
51
|
+
def debug(input)
|
52
|
+
res = parse(input)
|
53
|
+
puts '<pre>'
|
54
|
+
if res
|
55
|
+
puts 'success!'
|
56
|
+
puts res.inspect
|
57
|
+
else
|
58
|
+
puts input
|
59
|
+
val = @parser.failure_reason.to_s.match(/column [0-9]*/).
|
60
|
+
to_s.gsub(/column /,'').to_i
|
61
|
+
print ('-' * (val - 1))
|
62
|
+
print "^ Computer says 'no'!\n"
|
63
|
+
puts @parser.failure_reason
|
64
|
+
puts @parser.to_yaml
|
65
|
+
end
|
66
|
+
puts '</pre>'
|
67
|
+
end
|
68
|
+
|
69
|
+
def read_test_file
|
70
|
+
f = open(File.expand_path(File.join(File.dirname(__FILE__),
|
71
|
+
'parser/test_data.txt')))
|
72
|
+
f.each do |line|
|
73
|
+
name, jsn = line.split("|")
|
74
|
+
if line.match(/^\s*#/) == nil && name && jsn
|
75
|
+
yield({:name => name, :jsn => jsn})
|
76
|
+
else
|
77
|
+
yield({:comment => line})
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
metadata
CHANGED
@@ -1,36 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
5
|
-
prerelease:
|
4
|
+
version: 3.1.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Dmitry Mozzherin
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-12-18 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: rake
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '10.0'
|
22
|
-
type: :runtime
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: '10.0'
|
30
13
|
- !ruby/object:Gem::Dependency
|
31
14
|
name: treetop
|
32
15
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
16
|
requirements:
|
35
17
|
- - ~>
|
36
18
|
- !ruby/object:Gem::Version
|
@@ -38,7 +20,6 @@ dependencies:
|
|
38
20
|
type: :runtime
|
39
21
|
prerelease: false
|
40
22
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
23
|
requirements:
|
43
24
|
- - ~>
|
44
25
|
- !ruby/object:Gem::Version
|
@@ -46,23 +27,20 @@ dependencies:
|
|
46
27
|
- !ruby/object:Gem::Dependency
|
47
28
|
name: parallel
|
48
29
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
30
|
requirements:
|
51
31
|
- - ~>
|
52
32
|
- !ruby/object:Gem::Version
|
53
|
-
version: '0.
|
33
|
+
version: '0.7'
|
54
34
|
type: :runtime
|
55
35
|
prerelease: false
|
56
36
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
37
|
requirements:
|
59
38
|
- - ~>
|
60
39
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
40
|
+
version: '0.7'
|
62
41
|
- !ruby/object:Gem::Dependency
|
63
42
|
name: unicode_utils
|
64
43
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
44
|
requirements:
|
67
45
|
- - ~>
|
68
46
|
- !ruby/object:Gem::Version
|
@@ -70,114 +48,100 @@ dependencies:
|
|
70
48
|
type: :runtime
|
71
49
|
prerelease: false
|
72
50
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
51
|
requirements:
|
75
52
|
- - ~>
|
76
53
|
- !ruby/object:Gem::Version
|
77
54
|
version: '1.4'
|
78
55
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
56
|
+
name: bundler
|
80
57
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
58
|
requirements:
|
83
59
|
- - ~>
|
84
60
|
- !ruby/object:Gem::Version
|
85
|
-
version: '1.
|
61
|
+
version: '1.3'
|
86
62
|
type: :development
|
87
63
|
prerelease: false
|
88
64
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
65
|
requirements:
|
91
66
|
- - ~>
|
92
67
|
- !ruby/object:Gem::Version
|
93
|
-
version: '1.
|
68
|
+
version: '1.3'
|
94
69
|
- !ruby/object:Gem::Dependency
|
95
|
-
name:
|
70
|
+
name: rake
|
96
71
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
72
|
requirements:
|
99
73
|
- - ~>
|
100
74
|
- !ruby/object:Gem::Version
|
101
|
-
version: '1
|
75
|
+
version: '10.1'
|
102
76
|
type: :development
|
103
77
|
prerelease: false
|
104
78
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
79
|
requirements:
|
107
80
|
- - ~>
|
108
81
|
- !ruby/object:Gem::Version
|
109
|
-
version: '1
|
82
|
+
version: '10.1'
|
110
83
|
- !ruby/object:Gem::Dependency
|
111
|
-
name:
|
84
|
+
name: rspec
|
112
85
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
86
|
requirements:
|
115
|
-
- -
|
87
|
+
- - ~>
|
116
88
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
118
|
-
type: :
|
89
|
+
version: '2.14'
|
90
|
+
type: :development
|
119
91
|
prerelease: false
|
120
92
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
93
|
requirements:
|
123
|
-
- -
|
94
|
+
- - ~>
|
124
95
|
- !ruby/object:Gem::Version
|
125
|
-
version: '
|
96
|
+
version: '2.14'
|
126
97
|
- !ruby/object:Gem::Dependency
|
127
|
-
name:
|
98
|
+
name: rr
|
128
99
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
100
|
requirements:
|
131
|
-
- -
|
101
|
+
- - ~>
|
132
102
|
- !ruby/object:Gem::Version
|
133
|
-
version: '
|
134
|
-
type: :
|
103
|
+
version: '1.1'
|
104
|
+
type: :development
|
135
105
|
prerelease: false
|
136
106
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
107
|
requirements:
|
139
|
-
- -
|
108
|
+
- - ~>
|
140
109
|
- !ruby/object:Gem::Version
|
141
|
-
version: '
|
110
|
+
version: '1.1'
|
142
111
|
- !ruby/object:Gem::Dependency
|
143
|
-
name:
|
112
|
+
name: debugger
|
144
113
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
114
|
requirements:
|
147
|
-
- -
|
115
|
+
- - ~>
|
148
116
|
- !ruby/object:Gem::Version
|
149
|
-
version: '
|
117
|
+
version: '1.6'
|
150
118
|
type: :development
|
151
119
|
prerelease: false
|
152
120
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
121
|
requirements:
|
155
|
-
- -
|
122
|
+
- - ~>
|
156
123
|
- !ruby/object:Gem::Version
|
157
|
-
version: '
|
124
|
+
version: '1.6'
|
158
125
|
description: Tools for biodiversity informatics
|
159
126
|
email: dmozzherin@gmail.com
|
160
127
|
executables:
|
161
128
|
- nnparse
|
162
129
|
- parserver
|
163
130
|
extensions: []
|
164
|
-
extra_rdoc_files:
|
165
|
-
- LICENSE
|
166
|
-
- README.md
|
131
|
+
extra_rdoc_files: []
|
167
132
|
files:
|
168
133
|
- .document
|
169
|
-
- .
|
134
|
+
- .gitignore
|
135
|
+
- .ruby-version
|
170
136
|
- .travis.yml
|
171
137
|
- CHANGELOG
|
172
138
|
- Gemfile
|
173
|
-
- Gemfile.lock
|
174
139
|
- LICENSE
|
175
140
|
- README.md
|
176
141
|
- Rakefile
|
177
|
-
- VERSION
|
178
142
|
- bin/nnparse
|
179
143
|
- bin/parserver
|
180
|
-
-
|
144
|
+
- biodiversity.gemspec
|
181
145
|
- examples/socket_client.rb
|
182
146
|
- lib/biodiversity.rb
|
183
147
|
- lib/biodiversity/guid.rb
|
@@ -186,42 +150,39 @@ files:
|
|
186
150
|
- lib/biodiversity/parser/scientific_name_canonical.treetop
|
187
151
|
- lib/biodiversity/parser/scientific_name_clean.treetop
|
188
152
|
- lib/biodiversity/parser/scientific_name_dirty.treetop
|
189
|
-
-
|
153
|
+
- lib/biodiversity/version.rb
|
154
|
+
- pkg/.gitkeep
|
190
155
|
- spec/biodiversity_spec.rb
|
191
156
|
- spec/guid/lsid.spec.rb
|
192
|
-
- spec/parser/
|
193
|
-
- spec/parser/
|
194
|
-
- spec/parser/
|
195
|
-
- spec/parser/
|
196
|
-
- spec/parser/spec_helper.rb
|
157
|
+
- spec/parser/scientific_name_canonical_spec.rb
|
158
|
+
- spec/parser/scientific_name_clean_spec.rb
|
159
|
+
- spec/parser/scientific_name_dirty_spec.rb
|
160
|
+
- spec/parser/scientific_name_spec.rb
|
197
161
|
- spec/parser/test_data.txt
|
198
162
|
- spec/parser/todo.txt
|
199
163
|
- spec/spec_helper.rb
|
200
|
-
homepage:
|
201
|
-
licenses:
|
164
|
+
homepage: https://github.com/GlobalNamesArchitecture/biodiversity
|
165
|
+
licenses:
|
166
|
+
- MIT
|
167
|
+
metadata: {}
|
202
168
|
post_install_message:
|
203
169
|
rdoc_options: []
|
204
170
|
require_paths:
|
205
171
|
- lib
|
206
172
|
required_ruby_version: !ruby/object:Gem::Requirement
|
207
|
-
none: false
|
208
173
|
requirements:
|
209
|
-
- -
|
174
|
+
- - '>='
|
210
175
|
- !ruby/object:Gem::Version
|
211
176
|
version: '0'
|
212
|
-
segments:
|
213
|
-
- 0
|
214
|
-
hash: 3336294247914629914
|
215
177
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
216
|
-
none: false
|
217
178
|
requirements:
|
218
|
-
- -
|
179
|
+
- - '>='
|
219
180
|
- !ruby/object:Gem::Version
|
220
181
|
version: '0'
|
221
182
|
requirements: []
|
222
183
|
rubyforge_project:
|
223
|
-
rubygems_version:
|
184
|
+
rubygems_version: 2.0.14
|
224
185
|
signing_key:
|
225
|
-
specification_version:
|
186
|
+
specification_version: 4
|
226
187
|
summary: Parser of scientific names
|
227
188
|
test_files: []
|
data/.rvmrc
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
rvm use ruby-1.9.3-p392@biodiversity --create
|
data/Gemfile.lock
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: https://rubygems.org/
|
3
|
-
specs:
|
4
|
-
columnize (0.3.6)
|
5
|
-
debugger (1.5.0)
|
6
|
-
columnize (>= 0.3.1)
|
7
|
-
debugger-linecache (~> 1.2.0)
|
8
|
-
debugger-ruby_core_source (~> 1.2.0)
|
9
|
-
debugger-linecache (1.2.0)
|
10
|
-
debugger-ruby_core_source (1.2.0)
|
11
|
-
diff-lcs (1.2.4)
|
12
|
-
git (1.2.5)
|
13
|
-
jeweler (1.8.4)
|
14
|
-
bundler (~> 1.0)
|
15
|
-
git (>= 1.2.5)
|
16
|
-
rake
|
17
|
-
rdoc
|
18
|
-
json (1.7.7)
|
19
|
-
parallel (0.6.4)
|
20
|
-
polyglot (0.3.3)
|
21
|
-
rake (10.0.4)
|
22
|
-
rdoc (4.0.1)
|
23
|
-
json (~> 1.4)
|
24
|
-
rspec (2.13.0)
|
25
|
-
rspec-core (~> 2.13.0)
|
26
|
-
rspec-expectations (~> 2.13.0)
|
27
|
-
rspec-mocks (~> 2.13.0)
|
28
|
-
rspec-core (2.13.1)
|
29
|
-
rspec-expectations (2.13.0)
|
30
|
-
diff-lcs (>= 1.1.3, < 2.0)
|
31
|
-
rspec-mocks (2.13.1)
|
32
|
-
treetop (1.4.12)
|
33
|
-
polyglot
|
34
|
-
polyglot (>= 0.3.1)
|
35
|
-
unicode_utils (1.4.0)
|
36
|
-
|
37
|
-
PLATFORMS
|
38
|
-
ruby
|
39
|
-
|
40
|
-
DEPENDENCIES
|
41
|
-
debugger (~> 1.5)
|
42
|
-
jeweler (~> 1.8)
|
43
|
-
parallel (~> 0.6)
|
44
|
-
rake (~> 10.0)
|
45
|
-
rspec (~> 2.13)
|
46
|
-
treetop (~> 1.4)
|
47
|
-
unicode_utils (~> 1.4)
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
3.1.2
|
data/conf/environment.rb
DELETED
data/pkg/.gitignore
DELETED
File without changes
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
dir = File.dirname("__FILE__")
|
3
|
-
require File.expand_path(dir + '../../spec/parser/spec_helper')
|
4
|
-
|
5
|
-
describe ScientificNameCanonical do
|
6
|
-
before(:all) do
|
7
|
-
set_parser(ScientificNameCanonicalParser.new)
|
8
|
-
end
|
9
|
-
|
10
|
-
it 'should parse names with valid name part and unparseable rest' do
|
11
|
-
[
|
12
|
-
['Morea ssjjlajajaj324$33 234243242','Morea', [{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
|
13
|
-
['Morea (Morea) Burt 2342343242 23424322342 23424234', 'Morea (Morea)', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}}], {0=>["genus", 5], 7=>["infragenus", 12]}],
|
14
|
-
['Morea (Morea) burtius 2342343242 23424322342 23424234', 'Morea (Morea) burtius', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}, :species=>{:string=>"burtius"}}], {0=>["genus", 5], 7=>["infragenus", 12], 14=>["species", 21]}],
|
15
|
-
['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',[{:genus=>{:string=>"Moraea"}, :species=>{:string=>"spathulata"}}], {0=>["genus", 6], 7=>["species", 17]} ],
|
16
|
-
['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',[{:genus=>{:string=>"Verpericola"}, :species=>{:string=>"megasoma"}}], {0=>["genus", 11], 12=>["species", 20]}]
|
17
|
-
].each do |n|
|
18
|
-
parse(n[0]).should_not be_nil
|
19
|
-
value(n[0]).should == n[1]
|
20
|
-
details(n[0]).should == n[2]
|
21
|
-
pos(n[0]).should == n[3]
|
22
|
-
parse(n[0]).hybrid.should be_false
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
data/spec/parser/spec_helper.rb
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
dir = File.dirname("__FILE__")
|
2
|
-
require 'rubygems'
|
3
|
-
require 'yaml'
|
4
|
-
require 'treetop'
|
5
|
-
require 'json'
|
6
|
-
require File.expand_path(dir + '../../lib/biodiversity/parser')
|
7
|
-
|
8
|
-
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_clean'))
|
9
|
-
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_dirty'))
|
10
|
-
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_canonical'))
|
11
|
-
|
12
|
-
PARSER_TEST_VERSION = "test_version"
|
13
|
-
|
14
|
-
def set_parser(parser)
|
15
|
-
@parser = parser
|
16
|
-
end
|
17
|
-
|
18
|
-
def parse(input)
|
19
|
-
@parser.parse(input)
|
20
|
-
end
|
21
|
-
|
22
|
-
def value(input)
|
23
|
-
parse(input).value
|
24
|
-
end
|
25
|
-
|
26
|
-
def canonical(input)
|
27
|
-
parse(input).canonical
|
28
|
-
end
|
29
|
-
|
30
|
-
def details(input)
|
31
|
-
parse(input).details
|
32
|
-
end
|
33
|
-
|
34
|
-
def pos(input)
|
35
|
-
parse(input).pos
|
36
|
-
end
|
37
|
-
|
38
|
-
def json(input)
|
39
|
-
parse(input).to_json.gsub(/"parser_version":"[^"]*"/, %Q["parser_version":"#{PARSER_TEST_VERSION}"])
|
40
|
-
end
|
41
|
-
|
42
|
-
def debug(input)
|
43
|
-
res = parse(input)
|
44
|
-
puts "<pre>"
|
45
|
-
if res
|
46
|
-
puts 'success!'
|
47
|
-
puts res.inspect
|
48
|
-
else
|
49
|
-
puts input
|
50
|
-
val = @parser.failure_reason.to_s.match(/column [0-9]*/).to_s.gsub(/column /,'').to_i
|
51
|
-
print ("-" * (val - 1))
|
52
|
-
print "^ Computer says 'no'!\n"
|
53
|
-
puts @parser.failure_reason
|
54
|
-
puts @parser.to_yaml
|
55
|
-
end
|
56
|
-
puts "</pre>"
|
57
|
-
end
|
58
|
-
|
59
|
-
def read_test_file
|
60
|
-
f = open(File.expand_path(File.dirname("__FILE__") + "../../spec/parser/test_data.txt"))
|
61
|
-
f.each do |line|
|
62
|
-
name, jsn = line.split("|")
|
63
|
-
if line.match(/^\s*#/) == nil && name && jsn
|
64
|
-
yield({:name => name, :jsn => jsn})
|
65
|
-
else
|
66
|
-
yield({:comment => line})
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|