biodiversity19 3.1.2 → 3.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d9c4aa2e950ccec40b8af98e386b9af944be8617
4
+ data.tar.gz: 8e0848a483fc7e099bcea8baf9a1babb7f4e34c8
5
+ SHA512:
6
+ metadata.gz: 3f2108edd351d1581649033e075a1296fa709a8f9f9bd781e54c1c47c04d164039388a7c31b6a835f2be479a76cba4d36cd195d12395fcd7bc744702152ff08a
7
+ data.tar.gz: 841da1d2e47ae88b90764c460a310a4c62c2bebf055ea888c903754a14541813d2eab4a9e068c96c0c4c3a535e8084970706be0b418b6822e8bd58efc9ee05b4
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ lib/biodiversity/parser/*rb
2
+ *.sw?
3
+ .DS_Store
4
+ coverage
5
+ rdoc
6
+ pkg
7
+ *.swp
8
+ *.swo
9
+ biodiversity*.gem
10
+ *json
11
+ *xml
12
+ tmp
13
+ .DS_Store
14
+ spec/parser/test_data_new.txt
15
+ t
16
+ bin
17
+ .bundle
18
+ bundle_bin
19
+ Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.0.0-p353
data/.travis.yml CHANGED
@@ -1,7 +1,8 @@
1
1
  rvm:
2
- - 1.9.3
3
- - 2.0.0
4
- bundler_args: --without development
2
+ - 1.9.3-p448
3
+ - 2.0.0-p353
5
4
  branches:
6
5
  only:
7
6
  - master
7
+ before_script:
8
+ - bundle exec rake tt
data/Gemfile CHANGED
@@ -1,15 +1,3 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- gem 'rake', '~> 10.0'
4
- gem 'treetop', '~> 1.4'
5
- gem 'parallel', '~> 0.6'
6
- gem 'unicode_utils', '~> 1.4'
7
-
8
- group :development do
9
- gem 'debugger', '~> 1.5'
10
- gem 'jeweler', '~> 1.8'
11
- end
12
-
13
- group :test do
14
- gem 'rspec', '~> 2.13'
15
- end
3
+ gemspec
data/Rakefile CHANGED
@@ -1,12 +1,21 @@
1
- dir = File.dirname(__FILE__)
2
- require 'rubygems'
3
- require 'rake'
4
- #$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ begin
5
+ Bundler.setup(:default, :development)
6
+ rescue Bundler::BundlerError => e
7
+ $stderr.puts e.message
8
+ $stderr.puts 'Run `bundle install` to install missing gems'
9
+ exit e.status_code
10
+ end
11
+
5
12
  require 'rspec/core'
6
13
  require 'rspec/core/rake_task'
14
+ require 'rake/dsl_definition'
15
+ require 'rake'
16
+ require 'rspec'
17
+ require 'rspec/core/rake_task'
7
18
 
8
- #Gem::manage_gems
9
- #require 'rake/gempackagetask'
10
19
 
11
20
  task :default => :spec
12
21
 
@@ -14,35 +23,8 @@ RSpec::Core::RakeTask.new do |t|
14
23
  t.pattern = 'spec/**/*spec.rb'
15
24
  end
16
25
 
17
- ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
18
-
19
-
20
- begin
21
- require 'jeweler'
22
- Jeweler::Tasks.new do |gem|
23
- gem.name = 'biodiversity19'
24
- #To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
25
- gem.summary = 'Parser of scientific names'
26
- gem.description = 'Tools for biodiversity informatics'
27
- gem.email = 'dmozzherin@gmail.com'
28
- gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
29
- gem.authors = ['Dmitry Mozzherin']
30
- gem.has_rdoc = false
31
- gem.bindir = 'bin'
32
- gem.executables = ['nnparse', 'parserver']
33
- gem.add_dependency('treetop')
34
- gem.add_dependency('parallel')
35
- # gem.add_dependency('json') if ruby_version < 19
36
- gem.add_development_dependency "rspec"
37
- # gem is a Gem::Specification...
38
- # see http://www.rubygems.org/read/chapter/20 for additional settings
39
- end
40
- rescue LoadError
41
- puts 'Jeweler (or a dependency) not available. ' +
42
- 'Install it with: sudo gem install jeweler'
43
- end
44
-
45
26
  task :tt do
27
+ dir = File.dirname(__FILE__)
46
28
  ['scientific_name_clean',
47
29
  'scientific_name_dirty',
48
30
  'scientific_name_canonical'].each do |f|
data/bin/nnparse CHANGED
@@ -1,24 +1,20 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
- ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
4
- if ruby_min_version < 19
5
- require 'rubygems'
6
- gem_name = 'biodiversity'
7
- else
8
- gem_name = 'biodiversity19'
9
- end
10
- gem gem_name rescue nil
11
3
 
12
- $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
13
4
  require 'biodiversity'
14
5
  require 'json'
15
6
 
16
7
  def parser_error(name)
17
- {'scientificName' => {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}}.to_json
8
+ { 'scientificName' =>
9
+ { 'parsed' => false,
10
+ 'verbatim' => name,
11
+ 'error' => 'Parser error' } }.to_json
18
12
  end
19
13
 
20
14
  if ARGV.empty?
21
- puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
15
+ puts "Usage:\n\n" +
16
+ "nnparse file_with_scientific_names [output_file]\n\n" +
17
+ "default output_file is parsed.json\n\n"
22
18
  exit
23
19
  end
24
20
 
@@ -30,21 +26,14 @@ p = ScientificNameParser.new
30
26
  o = open(output, 'w')
31
27
  count = 0
32
28
  puts 'Parsing...'
33
- f = ruby_min_version < 19 ? open(input) : open(input, 'r:utf-8')
29
+ f = open(input, 'r:utf-8')
34
30
  f.each do |line|
35
31
  count += 1
36
32
  puts("%s lines parsed" % count) if count % 10000 == 0
37
33
  name = line.gsub(/^[\d]*\s*/, '').strip
38
34
  begin
39
- if ruby_min_version < 19
40
- old_kcode = $KCODE
41
- $KCODE = 'NONE'
42
- end
43
35
  p.parse(name)
44
36
  parsed_data = p.parsed.all_json rescue parser_error(name)
45
- if ruby_min_version < 19
46
- $KCODE = old_kcode
47
- end
48
37
  rescue
49
38
  parsed_data = parser_error(name)
50
39
  end
data/bin/parserver CHANGED
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
- require 'rubygems'
3
2
  require 'optparse'
4
3
  require 'socket'
5
- require 'biodiversity' # Get sockets from stdlib
4
+ require 'biodiversity'
6
5
 
7
6
  DEFAULT_PORT = 4334
8
7
  RUBY_VERSION_INT = RUBY_VERSION.split('.')[0..1].join('').to_i
@@ -0,0 +1,28 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+
3
+ require 'biodiversity/version'
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = 'biodiversity19'
7
+ gem.version = Biodiversity::VERSION
8
+ gem.homepage = 'https://github.com/GlobalNamesArchitecture/biodiversity'
9
+ gem.license = 'MIT'
10
+ gem.summary = %Q{Parser of scientific names}
11
+ gem.description = %Q{Tools for biodiversity informatics}
12
+ gem.authors = ['Dmitry Mozzherin']
13
+ gem.email = 'dmozzherin@gmail.com'
14
+
15
+ gem.files = `git ls-files`.split("\n")
16
+ gem.executables = ['nnparse', 'parserver']
17
+ gem.require_paths = ['lib']
18
+
19
+ gem.add_runtime_dependency 'treetop', '~> 1.4'
20
+ gem.add_runtime_dependency 'parallel', '~> 0.7'
21
+ gem.add_runtime_dependency 'unicode_utils', '~> 1.4'
22
+
23
+ gem.add_development_dependency 'bundler', '~> 1.3'
24
+ gem.add_development_dependency 'rake', '~> 10.1'
25
+ gem.add_development_dependency 'rspec', '~> 2.14'
26
+ gem.add_development_dependency 'rr', '~> 1.1'
27
+ gem.add_development_dependency 'debugger', '~> 1.6'
28
+ end
data/lib/biodiversity.rb CHANGED
@@ -1,9 +1,15 @@
1
- require 'rubygems'
2
1
  require 'treetop'
2
+ require 'json'
3
+ require 'open-uri'
4
+ require_relative 'biodiversity/version'
5
+ require_relative 'biodiversity/parser'
6
+ require_relative 'biodiversity/guid'
3
7
 
4
- dir = File.dirname(__FILE__)
8
+ module Biodiversity
9
+ LSID_RESOLVER_URL = 'http://lsid.tdwg.org/'
10
+
11
+ def self.version
12
+ VERSION
13
+ end
14
+ end
5
15
 
6
- BIODIVERSITY_ROOT = File.join(dir, 'biodiversity')
7
- require File.join(dir, "/../conf/environment")
8
- require File.join(BIODIVERSITY_ROOT, "parser")
9
- require File.join(BIODIVERSITY_ROOT, "guid")
@@ -1,2 +1 @@
1
- dir = File.dirname(__FILE__)
2
- require File.join(dir, *%w[guid lsid])
1
+ require_relative 'guid/lsid'
@@ -1,5 +1,3 @@
1
- require 'open-uri'
2
-
3
1
  class LsidResolver
4
2
  def self.resolve(lsid)
5
3
  http_get_rdf(lsid)
@@ -8,11 +6,11 @@ class LsidResolver
8
6
  protected
9
7
  def self.http_get_rdf(lsid)
10
8
  rdf = ''
11
- open(LSID_RESOLVER_URL + lsid) do |f|
9
+ open(Biodiversity::LSID_RESOLVER_URL + lsid) do |f|
12
10
  f.each do |line|
13
11
  rdf += line if !line.strip.blank?
14
12
  end
15
13
  end
16
14
  rdf
17
15
  end
18
- end
16
+ end
@@ -1,10 +1,7 @@
1
1
  # encoding: UTF-8
2
- dir = File.dirname(__FILE__)
3
- require File.join(dir, *%w[parser scientific_name_clean])
4
- require File.join(dir, *%w[parser scientific_name_dirty])
5
- require File.join(dir, *%w[parser scientific_name_canonical])
6
- require 'rubygems'
7
- require 'json'
2
+ require_relative 'parser/scientific_name_clean'
3
+ require_relative 'parser/scientific_name_dirty'
4
+ require_relative 'parser/scientific_name_canonical'
8
5
 
9
6
  module PreProcessor
10
7
  NOTES = /\s+(species\s+group|species\s+complex|group|author)\b.*$/i
@@ -108,10 +105,6 @@ end
108
105
  # end
109
106
 
110
107
  class ScientificNameParser
111
- VERSION = open(File.join(File.dirname(__FILE__),
112
- '..',
113
- '..',
114
- 'VERSION')).readline.strip
115
108
 
116
109
  FAILED_RESULT = ->(name) do
117
110
  { scientificName:
@@ -120,7 +113,7 @@ class ScientificNameParser
120
113
  end
121
114
 
122
115
  def self.version
123
- VERSION
116
+ Biodiversity::VERSION
124
117
  end
125
118
 
126
119
  def self.fix_case(name_string)
@@ -213,7 +206,7 @@ class ScientificNameParser
213
206
  def @parsed.all(opts = {})
214
207
  canonical_with_rank = !!opts[:canonical_with_rank]
215
208
  parsed = self.class != Hash
216
- res = { parsed: parsed, parser_version: ScientificNameParser::VERSION}
209
+ res = { parsed: parsed, parser_version: ScientificNameParser::version}
217
210
  if parsed
218
211
  hybrid = self.hybrid rescue false
219
212
  res.merge!({
@@ -0,0 +1,3 @@
1
+ module Biodiversity
2
+ VERSION = '3.1.3'
3
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe Biodiversity do
4
+ it 'should have version' do
5
+ version = Biodiversity::VERSION
6
+ version.should =~ /^\d+\.\d+\.\d+/
7
+ version.should == Biodiversity.version
8
+ end
9
+ end
@@ -1,11 +1,9 @@
1
- dir = File.dirname("__FILE__")
2
- require 'rubygems'
3
- require File.expand_path(dir + "../../conf/environment")
4
- require File.expand_path(dir + "../../lib/biodiversity/guid")
1
+ require_relative '../spec_helper'
5
2
 
6
3
  describe LsidResolver do
7
- it "should return RFD document from lsid" do
8
- lsid = "urn:lsid:ubio.org:classificationbank:2232671"
9
- LsidResolver.resolve(lsid).class.should == "".class
4
+ it 'should return RFD document from lsid' do
5
+ lsid = 'urn:lsid:ubio.org:classificationbank:2232671'
6
+ stub(LsidResolver).resolve(lsid) {''}
7
+ LsidResolver.resolve(lsid).class.should == String
10
8
  end
11
9
  end
@@ -0,0 +1,37 @@
1
+ # encoding: UTF-8
2
+ require_relative '../spec_helper'
3
+
4
+ describe ScientificNameCanonical do
5
+ before(:all) do
6
+ set_parser(ScientificNameCanonicalParser.new)
7
+ end
8
+
9
+ it 'should parse names with valid name part and unparseable rest' do
10
+ [
11
+ ['Morea ssjjlajajaj324$33 234243242','Morea',
12
+ [{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
13
+ ['Morea (Morea) Burt 2342343242 23424322342 23424234',
14
+ 'Morea (Morea)', [{:genus=>{:string=>"Morea"},
15
+ :infragenus=>{:string=>"Morea"}}],
16
+ {0=>["genus", 5], 7=>["infragenus", 12]}],
17
+ ['Morea (Morea) burtius 2342343242 23424322342 23424234',
18
+ 'Morea (Morea) burtius', [{:genus=>{:string=>"Morea"},
19
+ :infragenus=>{:string=>"Morea"},
20
+ :species=>{:string=>"burtius"}}],
21
+ {0=>["genus", 5], 7=>["infragenus", 12], 14=>["species", 21]}],
22
+ ['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',
23
+ [{:genus=>{:string=>"Moraea"}, :species=>{:string=>"spathulata"}}],
24
+ {0=>["genus", 6], 7=>["species", 17]} ],
25
+ ['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',
26
+ [{:genus=>{:string=>"Verpericola"}, :species=>{:string=>"megasoma"}}],
27
+ {0=>["genus", 11], 12=>["species", 20]}]
28
+ ].each do |n|
29
+ parse(n[0]).should_not be_nil
30
+ value(n[0]).should == n[1]
31
+ details(n[0]).should == n[2]
32
+ pos(n[0]).should == n[3]
33
+ parse(n[0]).hybrid.should be_false
34
+ end
35
+ end
36
+
37
+ end
@@ -1,6 +1,5 @@
1
1
  # encoding: UTF-8
2
- dir = File.dirname("__FILE__")
3
- require File.expand_path(dir + '../../spec/parser/spec_helper')
2
+ require_relative '../spec_helper'
4
3
 
5
4
 
6
5
  describe ScientificNameClean do
@@ -20,19 +19,36 @@ describe ScientificNameClean do
20
19
  it 'should parse uninomial with author and year' do
21
20
  sn = 'Pseudocercospora Speg.'
22
21
  parse(sn).should_not be_nil
23
- details(sn).should == [{:uninomial=>{:string=>"Pseudocercospora", :authorship=>"Speg.", :basionymAuthorTeam=>{:authorTeam=>"Speg.", :author=>["Speg."]}}}]
22
+ details(sn).should == [{:uninomial=>
23
+ {:string=>"Pseudocercospora",
24
+ :authorship=>"Speg.",
25
+ :basionymAuthorTeam=>
26
+ {:authorTeam=>"Speg.", :author=>["Speg."]}}}]
24
27
  pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 22]}
25
28
  sn = 'Pseudocercospora Spegazzini, 1910'
26
29
  parse(sn).should_not be_nil
27
30
  value(sn).should == 'Pseudocercospora Spegazzini 1910'
28
- details(sn).should == [{:uninomial=>{:string=>"Pseudocercospora", :authorship=>"Spegazzini, 1910", :basionymAuthorTeam=>{:authorTeam=>"Spegazzini", :author=>["Spegazzini"], :year=>"1910"}}}]
29
- pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 27], 29=>["year", 33]}
31
+ details(sn).should == [{:uninomial=>
32
+ {:string=>"Pseudocercospora",
33
+ :authorship=>"Spegazzini, 1910",
34
+ :basionymAuthorTeam=>
35
+ {:authorTeam=>"Spegazzini",
36
+ :author=>["Spegazzini"], :year=>"1910"}}}]
37
+ pos(sn).should == {0=>["uninomial", 16],
38
+ 17=>["author_word", 27], 29=>["year", 33]}
30
39
  end
31
40
 
32
41
  it "should parse uninomials with uninomial ranks" do
33
42
  sn = "Epacridaceae trib. Archerieae Crayn & Quinn"
34
43
  parse(sn).should_not be_nil
35
- details(sn).should == [{:uninomial=>{:string=>"Epacridaceae"}, :rank_uninomials=>"trib.", :uninomial2=>{:string=>"Archerieae", :authorship=>"Crayn & Quinn", :basionymAuthorTeam=>{:authorTeam=>"Crayn & Quinn", :author=>["Crayn", "Quinn"]}}}]
44
+ details(sn).should == [{:uninomial=>
45
+ {:string=>"Epacridaceae"},
46
+ :rank_uninomials=>"trib.",
47
+ :uninomial2=>{:string=>"Archerieae",
48
+ :authorship=>"Crayn & Quinn",
49
+ :basionymAuthorTeam=>
50
+ {:authorTeam=>"Crayn & Quinn",
51
+ :author=>["Crayn", "Quinn"]}}}]
36
52
  end
37
53
 
38
54
  it 'should parse names with a valid 2 letter genus' do
@@ -65,7 +81,9 @@ describe ScientificNameClean do
65
81
  parse(sn).should_not be_nil
66
82
  value(sn).should == 'Pseudocercospora dendrobii'
67
83
  canonical(sn).should == 'Pseudocercospora dendrobii'
68
- details(sn).should == [{:genus=>{:string=>"Pseudocercospora"}, :species=>{:string=>"dendrobii"}}]
84
+ details(sn).should == [{:genus=>
85
+ {:string=>"Pseudocercospora"},
86
+ :species=>{:string=>"dendrobii"}}]
69
87
  pos(sn).should == {0=>["genus", 16], 21=>["species", 30]}
70
88
  end
71
89
 
@@ -76,7 +94,9 @@ describe ScientificNameClean do
76
94
  sn = 'Ps. dendrobii'
77
95
  parse(sn).should_not be_nil
78
96
  value(sn).should == 'Ps. dendrobii'
79
- details(sn).should == [{:genus=>{:string=>"Ps."}, :species=>{:string=>"dendrobii"}}]
97
+ details(sn).should == [{:genus=>
98
+ {:string=>"Ps."},
99
+ :species=>{:string=>"dendrobii"}}]
80
100
  end
81
101
 
82
102
 
@@ -87,8 +107,16 @@ describe ScientificNameClean do
87
107
  sn = "Platypus bicaudatulus Schedl, 1935h"
88
108
  parse(sn).should_not be_nil
89
109
  value(sn).should == "Platypus bicaudatulus Schedl 1935"
90
- details(sn).should == [{:genus=>{:string=>"Platypus"}, :species=>{:string=>"bicaudatulus", :authorship=>"Schedl, 1935h", :basionymAuthorTeam=>{:authorTeam=>"Schedl", :author=>["Schedl"], :year=>"1935"}}}]
91
- pos(sn).should == {0=>["genus", 8], 9=>["species", 21], 22=>["author_word", 28], 30=>["year", 35]}
110
+ details(sn).should == [{:genus=>
111
+ {:string=>"Platypus"},
112
+ :species=>{:string=>"bicaudatulus",
113
+ :authorship=>"Schedl, 1935h",
114
+ :basionymAuthorTeam=>
115
+ {:authorTeam=>"Schedl", :author=>["Schedl"],
116
+ :year=>"1935"}}}]
117
+ pos(sn).should == {0=>["genus", 8],
118
+ 9=>["species", 21], 22=>["author_word", 28],
119
+ 30=>["year", 35]}
92
120
  parse("Platypus bicaudatulus Schedl, 1935B").should_not be_nil
93
121
  sn = "Platypus bicaudatulus Schedl (1935h)"
94
122
  parse(sn).should_not be_nil
@@ -1,6 +1,5 @@
1
1
  # encoding: UTF-8
2
- dir = File.dirname("__FILE__")
3
- require File.expand_path(dir + '../../spec/parser/spec_helper')
2
+ require_relative '../spec_helper'
4
3
 
5
4
  describe ScientificNameDirty do
6
5
  before(:all) do
@@ -15,7 +14,13 @@ describe ScientificNameDirty do
15
14
  sn = "Eichornia crassipes ( (Martius) ) Solms-Laub."
16
15
  parse(sn).should_not be_nil
17
16
  value(sn).should == "Eichornia crassipes (Martius) Solms-Laub."
18
- details(sn).should == [{:genus=>{:string=>"Eichornia"}, :species=>{:string=>"crassipes", :authorship=>"( (Martius) ) Solms-Laub.", :combinationAuthorTeam=>{:authorTeam=>"Solms-Laub.", :author=>["Solms-Laub."]}, :basionymAuthorTeam=>{:authorTeam=>"Martius", :author=>["Martius"]}}}]
17
+ details(sn).should == [{:genus=>{:string=>"Eichornia"},
18
+ :species=>{:string=>"crassipes",
19
+ :authorship=>"( (Martius) ) Solms-Laub.",
20
+ :combinationAuthorTeam=>{:authorTeam=>"Solms-Laub.",
21
+ :author=>["Solms-Laub."]},
22
+ :basionymAuthorTeam=>{:authorTeam=>"Martius",
23
+ :author=>["Martius"]}}}]
19
24
  pos(sn).should == {0=>["genus", 9], 10=>["species", 19], 23=>["author_word", 30], 34=>["author_word", 45]}
20
25
  end
21
26
 
@@ -1,8 +1,7 @@
1
1
  # encoding: utf-8
2
+
2
3
  #NOTE: this spec needs compiled treetop files.
3
- dir = File.dirname("__FILE__")
4
- require File.expand_path(dir + '../../spec/parser/spec_helper')
5
- require File.expand_path(dir + '../../lib/biodiversity/parser')
4
+ require_relative '../spec_helper'
6
5
 
7
6
  describe ScientificNameParser do
8
7
  before(:all) do
@@ -15,10 +14,10 @@ describe ScientificNameParser do
15
14
 
16
15
  it 'should ScientificNameParser::fix_case' do
17
16
  names = [
18
- ["QUERCUS ALBA", "Quercus alba"],
19
- ["QUERCUS (QUERCUS) ALBA", "Quercus (Quercus) alba"],
20
- ["QÜERCUS", "Qüercus"],
21
- ["PARDOSA MOéSTA", "Pardosa moésta"],
17
+ ['QUERCUS ALBA', 'Quercus alba'],
18
+ ['QUERCUS (QUERCUS) ALBA', 'Quercus (Quercus) alba'],
19
+ ['QÜERCUS', 'Qüercus'],
20
+ ['PARDOSA MOéSTA', 'Pardosa moésta'],
22
21
  ]
23
22
  names.each do |name, capitalization|
24
23
  ScientificNameParser::fix_case(name).should == capitalization
@@ -34,7 +33,7 @@ describe ScientificNameParser do
34
33
 
35
34
  # it 'should generate new test_file' do
36
35
  # new_test = open(File.expand_path(dir +
37
- # "../../spec/parser/test_data_new.txt"),'w')
36
+ # '../../spec/parser/test_data_new.txt'),'w')
38
37
  # read_test_file do |y|
39
38
  # if y[:comment]
40
39
  # new_test.write y[:comment]
@@ -52,20 +51,20 @@ describe ScientificNameParser do
52
51
  '"parser_version":"test_version","verbatim":"ddd sljlkj 3223452432"}}'
53
52
  end
54
53
 
55
- it "should show version when the flag :show_version set to true" do
54
+ it 'should show version when the flag :show_version set to true' do
56
55
  parse('Homo sapiens')[:scientificName][:parser_version].should_not be_nil
57
56
  end
58
57
 
59
- it "should show version for not spelled names" do
58
+ it 'should show version for not spelled names' do
60
59
  parse('not_a_name')[:scientificName][:parser_version].should_not be_nil
61
60
  end
62
61
 
63
- it "should generate version for viruses" do
62
+ it 'should generate version for viruses' do
64
63
  parse('Nile virus')[:scientificName][:parser_version].should_not be_nil
65
64
  end
66
65
  end
67
66
 
68
- describe "ScientificNameParser with ranked canonicals" do
67
+ describe 'ScientificNameParser with ranked canonicals' do
69
68
  before(:all) do
70
69
  @parser = ScientificNameParser.new(canonical_with_rank: true)
71
70
  end
@@ -101,12 +100,12 @@ describe "ScientificNameParser with ranked canonicals" do
101
100
  end
102
101
 
103
102
  describe ParallelParser do
104
- it "should find number of cpus" do
103
+ it 'should find number of cpus' do
105
104
  pparser = ParallelParser.new
106
105
  pparser.cpu_num.should > 0
107
106
  end
108
107
 
109
- it "should parse several names in parallel" do
108
+ it 'should parse several names in parallel' do
110
109
  names = []
111
110
  read_test_file { |n| names << (n[:name]) if n[:name] }
112
111
  names.uniq!
@@ -116,7 +115,7 @@ describe ParallelParser do
116
115
  res.keys.size.should == names.size
117
116
  end
118
117
 
119
- it "should parse several names in parallel with given num of processes" do
118
+ it 'should parse several names in parallel with given num of processes' do
120
119
  names = []
121
120
  read_test_file { |n| names << (n[:name]) if n[:name] }
122
121
  names.uniq!
@@ -126,8 +125,8 @@ describe ParallelParser do
126
125
  res.keys.size.should == names.size
127
126
  end
128
127
 
129
- it "should have parsed name in native ruby format and in returned as \
130
- a hash with name as a key and parsed data as value" do
128
+ it 'should have parsed name in native ruby format and in returned as \
129
+ a hash with name as a key and parsed data as value' do
131
130
  names = []
132
131
  read_test_file { |n| names << (n[:name]) if n[:name] }
133
132
  names.uniq!
data/spec/spec_helper.rb CHANGED
@@ -0,0 +1,82 @@
1
+ require 'yaml'
2
+ require 'treetop'
3
+ require 'json'
4
+ require 'biodiversity'
5
+
6
+
7
+ RSpec.configure do |c|
8
+ c.mock_with :rr
9
+ end
10
+
11
+ dir = File.dirname(__FILE__)
12
+ Treetop.load(File.expand_path(File.join(dir,
13
+ '../lib/biodiversity/parser/scientific_name_clean')))
14
+ Treetop.load(File.expand_path(File.join(dir,
15
+ '../lib/biodiversity/parser/scientific_name_dirty')))
16
+ Treetop.load(File.expand_path(File.join(dir,
17
+ '../lib/biodiversity/parser/scientific_name_canonical')))
18
+
19
+ PARSER_TEST_VERSION = 'test_version'
20
+
21
+ def set_parser(parser)
22
+ @parser = parser
23
+ end
24
+
25
+ def parse(input)
26
+ @parser.parse(input)
27
+ end
28
+
29
+ def value(input)
30
+ parse(input).value
31
+ end
32
+
33
+ def canonical(input)
34
+ parse(input).canonical
35
+ end
36
+
37
+ def details(input)
38
+ parse(input).details
39
+ end
40
+
41
+ def pos(input)
42
+ parse(input).pos
43
+ end
44
+
45
+ def json(input)
46
+ parse(input).
47
+ to_json.gsub(/"parser_version":"[^"]*"/,
48
+ %Q["parser_version":"#{PARSER_TEST_VERSION}"])
49
+ end
50
+
51
+ def debug(input)
52
+ res = parse(input)
53
+ puts '<pre>'
54
+ if res
55
+ puts 'success!'
56
+ puts res.inspect
57
+ else
58
+ puts input
59
+ val = @parser.failure_reason.to_s.match(/column [0-9]*/).
60
+ to_s.gsub(/column /,'').to_i
61
+ print ('-' * (val - 1))
62
+ print "^ Computer says 'no'!\n"
63
+ puts @parser.failure_reason
64
+ puts @parser.to_yaml
65
+ end
66
+ puts '</pre>'
67
+ end
68
+
69
+ def read_test_file
70
+ f = open(File.expand_path(File.join(File.dirname(__FILE__),
71
+ 'parser/test_data.txt')))
72
+ f.each do |line|
73
+ name, jsn = line.split("|")
74
+ if line.match(/^\s*#/) == nil && name && jsn
75
+ yield({:name => name, :jsn => jsn})
76
+ else
77
+ yield({:comment => line})
78
+ end
79
+ end
80
+ end
81
+
82
+
metadata CHANGED
@@ -1,36 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity19
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.2
5
- prerelease:
4
+ version: 3.1.3
6
5
  platform: ruby
7
6
  authors:
8
7
  - Dmitry Mozzherin
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-06-21 00:00:00.000000000 Z
11
+ date: 2013-12-18 00:00:00.000000000 Z
13
12
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: rake
16
- requirement: !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ~>
20
- - !ruby/object:Gem::Version
21
- version: '10.0'
22
- type: :runtime
23
- prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ~>
28
- - !ruby/object:Gem::Version
29
- version: '10.0'
30
13
  - !ruby/object:Gem::Dependency
31
14
  name: treetop
32
15
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
16
  requirements:
35
17
  - - ~>
36
18
  - !ruby/object:Gem::Version
@@ -38,7 +20,6 @@ dependencies:
38
20
  type: :runtime
39
21
  prerelease: false
40
22
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
23
  requirements:
43
24
  - - ~>
44
25
  - !ruby/object:Gem::Version
@@ -46,23 +27,20 @@ dependencies:
46
27
  - !ruby/object:Gem::Dependency
47
28
  name: parallel
48
29
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
30
  requirements:
51
31
  - - ~>
52
32
  - !ruby/object:Gem::Version
53
- version: '0.6'
33
+ version: '0.7'
54
34
  type: :runtime
55
35
  prerelease: false
56
36
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
37
  requirements:
59
38
  - - ~>
60
39
  - !ruby/object:Gem::Version
61
- version: '0.6'
40
+ version: '0.7'
62
41
  - !ruby/object:Gem::Dependency
63
42
  name: unicode_utils
64
43
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
44
  requirements:
67
45
  - - ~>
68
46
  - !ruby/object:Gem::Version
@@ -70,114 +48,100 @@ dependencies:
70
48
  type: :runtime
71
49
  prerelease: false
72
50
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
51
  requirements:
75
52
  - - ~>
76
53
  - !ruby/object:Gem::Version
77
54
  version: '1.4'
78
55
  - !ruby/object:Gem::Dependency
79
- name: debugger
56
+ name: bundler
80
57
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
58
  requirements:
83
59
  - - ~>
84
60
  - !ruby/object:Gem::Version
85
- version: '1.5'
61
+ version: '1.3'
86
62
  type: :development
87
63
  prerelease: false
88
64
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
65
  requirements:
91
66
  - - ~>
92
67
  - !ruby/object:Gem::Version
93
- version: '1.5'
68
+ version: '1.3'
94
69
  - !ruby/object:Gem::Dependency
95
- name: jeweler
70
+ name: rake
96
71
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
72
  requirements:
99
73
  - - ~>
100
74
  - !ruby/object:Gem::Version
101
- version: '1.8'
75
+ version: '10.1'
102
76
  type: :development
103
77
  prerelease: false
104
78
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
79
  requirements:
107
80
  - - ~>
108
81
  - !ruby/object:Gem::Version
109
- version: '1.8'
82
+ version: '10.1'
110
83
  - !ruby/object:Gem::Dependency
111
- name: treetop
84
+ name: rspec
112
85
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
86
  requirements:
115
- - - ! '>='
87
+ - - ~>
116
88
  - !ruby/object:Gem::Version
117
- version: '0'
118
- type: :runtime
89
+ version: '2.14'
90
+ type: :development
119
91
  prerelease: false
120
92
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
93
  requirements:
123
- - - ! '>='
94
+ - - ~>
124
95
  - !ruby/object:Gem::Version
125
- version: '0'
96
+ version: '2.14'
126
97
  - !ruby/object:Gem::Dependency
127
- name: parallel
98
+ name: rr
128
99
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
100
  requirements:
131
- - - ! '>='
101
+ - - ~>
132
102
  - !ruby/object:Gem::Version
133
- version: '0'
134
- type: :runtime
103
+ version: '1.1'
104
+ type: :development
135
105
  prerelease: false
136
106
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
107
  requirements:
139
- - - ! '>='
108
+ - - ~>
140
109
  - !ruby/object:Gem::Version
141
- version: '0'
110
+ version: '1.1'
142
111
  - !ruby/object:Gem::Dependency
143
- name: rspec
112
+ name: debugger
144
113
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
114
  requirements:
147
- - - ! '>='
115
+ - - ~>
148
116
  - !ruby/object:Gem::Version
149
- version: '0'
117
+ version: '1.6'
150
118
  type: :development
151
119
  prerelease: false
152
120
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
121
  requirements:
155
- - - ! '>='
122
+ - - ~>
156
123
  - !ruby/object:Gem::Version
157
- version: '0'
124
+ version: '1.6'
158
125
  description: Tools for biodiversity informatics
159
126
  email: dmozzherin@gmail.com
160
127
  executables:
161
128
  - nnparse
162
129
  - parserver
163
130
  extensions: []
164
- extra_rdoc_files:
165
- - LICENSE
166
- - README.md
131
+ extra_rdoc_files: []
167
132
  files:
168
133
  - .document
169
- - .rvmrc
134
+ - .gitignore
135
+ - .ruby-version
170
136
  - .travis.yml
171
137
  - CHANGELOG
172
138
  - Gemfile
173
- - Gemfile.lock
174
139
  - LICENSE
175
140
  - README.md
176
141
  - Rakefile
177
- - VERSION
178
142
  - bin/nnparse
179
143
  - bin/parserver
180
- - conf/environment.rb
144
+ - biodiversity.gemspec
181
145
  - examples/socket_client.rb
182
146
  - lib/biodiversity.rb
183
147
  - lib/biodiversity/guid.rb
@@ -186,42 +150,39 @@ files:
186
150
  - lib/biodiversity/parser/scientific_name_canonical.treetop
187
151
  - lib/biodiversity/parser/scientific_name_clean.treetop
188
152
  - lib/biodiversity/parser/scientific_name_dirty.treetop
189
- - pkg/.gitignore
153
+ - lib/biodiversity/version.rb
154
+ - pkg/.gitkeep
190
155
  - spec/biodiversity_spec.rb
191
156
  - spec/guid/lsid.spec.rb
192
- - spec/parser/scientific_name.spec.rb
193
- - spec/parser/scientific_name_canonical.spec.rb
194
- - spec/parser/scientific_name_clean.spec.rb
195
- - spec/parser/scientific_name_dirty.spec.rb
196
- - spec/parser/spec_helper.rb
157
+ - spec/parser/scientific_name_canonical_spec.rb
158
+ - spec/parser/scientific_name_clean_spec.rb
159
+ - spec/parser/scientific_name_dirty_spec.rb
160
+ - spec/parser/scientific_name_spec.rb
197
161
  - spec/parser/test_data.txt
198
162
  - spec/parser/todo.txt
199
163
  - spec/spec_helper.rb
200
- homepage: http://github.com/GlobalNamesArchitecture/biodiversity
201
- licenses: []
164
+ homepage: https://github.com/GlobalNamesArchitecture/biodiversity
165
+ licenses:
166
+ - MIT
167
+ metadata: {}
202
168
  post_install_message:
203
169
  rdoc_options: []
204
170
  require_paths:
205
171
  - lib
206
172
  required_ruby_version: !ruby/object:Gem::Requirement
207
- none: false
208
173
  requirements:
209
- - - ! '>='
174
+ - - '>='
210
175
  - !ruby/object:Gem::Version
211
176
  version: '0'
212
- segments:
213
- - 0
214
- hash: 4103623909009962506
215
177
  required_rubygems_version: !ruby/object:Gem::Requirement
216
- none: false
217
178
  requirements:
218
- - - ! '>='
179
+ - - '>='
219
180
  - !ruby/object:Gem::Version
220
181
  version: '0'
221
182
  requirements: []
222
183
  rubyforge_project:
223
- rubygems_version: 1.8.25
184
+ rubygems_version: 2.0.14
224
185
  signing_key:
225
- specification_version: 3
186
+ specification_version: 4
226
187
  summary: Parser of scientific names
227
188
  test_files: []
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm use ruby-1.9.3-p392@biodiversity --create
data/Gemfile.lock DELETED
@@ -1,47 +0,0 @@
1
- GEM
2
- remote: https://rubygems.org/
3
- specs:
4
- columnize (0.3.6)
5
- debugger (1.5.0)
6
- columnize (>= 0.3.1)
7
- debugger-linecache (~> 1.2.0)
8
- debugger-ruby_core_source (~> 1.2.0)
9
- debugger-linecache (1.2.0)
10
- debugger-ruby_core_source (1.2.0)
11
- diff-lcs (1.2.4)
12
- git (1.2.5)
13
- jeweler (1.8.4)
14
- bundler (~> 1.0)
15
- git (>= 1.2.5)
16
- rake
17
- rdoc
18
- json (1.7.7)
19
- parallel (0.6.4)
20
- polyglot (0.3.3)
21
- rake (10.0.4)
22
- rdoc (4.0.1)
23
- json (~> 1.4)
24
- rspec (2.13.0)
25
- rspec-core (~> 2.13.0)
26
- rspec-expectations (~> 2.13.0)
27
- rspec-mocks (~> 2.13.0)
28
- rspec-core (2.13.1)
29
- rspec-expectations (2.13.0)
30
- diff-lcs (>= 1.1.3, < 2.0)
31
- rspec-mocks (2.13.1)
32
- treetop (1.4.12)
33
- polyglot
34
- polyglot (>= 0.3.1)
35
- unicode_utils (1.4.0)
36
-
37
- PLATFORMS
38
- ruby
39
-
40
- DEPENDENCIES
41
- debugger (~> 1.5)
42
- jeweler (~> 1.8)
43
- parallel (~> 0.6)
44
- rake (~> 10.0)
45
- rspec (~> 2.13)
46
- treetop (~> 1.4)
47
- unicode_utils (~> 1.4)
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 3.1.2
data/conf/environment.rb DELETED
@@ -1,3 +0,0 @@
1
-
2
- #Constants
3
- LSID_RESOLVER_URL = "http://lsid.tdwg.org/"
data/pkg/.gitignore DELETED
File without changes
@@ -1,26 +0,0 @@
1
- # encoding: UTF-8
2
- dir = File.dirname("__FILE__")
3
- require File.expand_path(dir + '../../spec/parser/spec_helper')
4
-
5
- describe ScientificNameCanonical do
6
- before(:all) do
7
- set_parser(ScientificNameCanonicalParser.new)
8
- end
9
-
10
- it 'should parse names with valid name part and unparseable rest' do
11
- [
12
- ['Morea ssjjlajajaj324$33 234243242','Morea', [{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
13
- ['Morea (Morea) Burt 2342343242 23424322342 23424234', 'Morea (Morea)', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}}], {0=>["genus", 5], 7=>["infragenus", 12]}],
14
- ['Morea (Morea) burtius 2342343242 23424322342 23424234', 'Morea (Morea) burtius', [{:genus=>{:string=>"Morea"}, :infragenus=>{:string=>"Morea"}, :species=>{:string=>"burtius"}}], {0=>["genus", 5], 7=>["infragenus", 12], 14=>["species", 21]}],
15
- ['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',[{:genus=>{:string=>"Moraea"}, :species=>{:string=>"spathulata"}}], {0=>["genus", 6], 7=>["species", 17]} ],
16
- ['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',[{:genus=>{:string=>"Verpericola"}, :species=>{:string=>"megasoma"}}], {0=>["genus", 11], 12=>["species", 20]}]
17
- ].each do |n|
18
- parse(n[0]).should_not be_nil
19
- value(n[0]).should == n[1]
20
- details(n[0]).should == n[2]
21
- pos(n[0]).should == n[3]
22
- parse(n[0]).hybrid.should be_false
23
- end
24
- end
25
-
26
- end
@@ -1,70 +0,0 @@
1
- dir = File.dirname("__FILE__")
2
- require 'rubygems'
3
- require 'yaml'
4
- require 'treetop'
5
- require 'json'
6
- require File.expand_path(dir + '../../lib/biodiversity/parser')
7
-
8
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_clean'))
9
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_dirty'))
10
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_canonical'))
11
-
12
- PARSER_TEST_VERSION = "test_version"
13
-
14
- def set_parser(parser)
15
- @parser = parser
16
- end
17
-
18
- def parse(input)
19
- @parser.parse(input)
20
- end
21
-
22
- def value(input)
23
- parse(input).value
24
- end
25
-
26
- def canonical(input)
27
- parse(input).canonical
28
- end
29
-
30
- def details(input)
31
- parse(input).details
32
- end
33
-
34
- def pos(input)
35
- parse(input).pos
36
- end
37
-
38
- def json(input)
39
- parse(input).to_json.gsub(/"parser_version":"[^"]*"/, %Q["parser_version":"#{PARSER_TEST_VERSION}"])
40
- end
41
-
42
- def debug(input)
43
- res = parse(input)
44
- puts "<pre>"
45
- if res
46
- puts 'success!'
47
- puts res.inspect
48
- else
49
- puts input
50
- val = @parser.failure_reason.to_s.match(/column [0-9]*/).to_s.gsub(/column /,'').to_i
51
- print ("-" * (val - 1))
52
- print "^ Computer says 'no'!\n"
53
- puts @parser.failure_reason
54
- puts @parser.to_yaml
55
- end
56
- puts "</pre>"
57
- end
58
-
59
- def read_test_file
60
- f = open(File.expand_path(File.dirname("__FILE__") + "../../spec/parser/test_data.txt"))
61
- f.each do |line|
62
- name, jsn = line.split("|")
63
- if line.match(/^\s*#/) == nil && name && jsn
64
- yield({:name => name, :jsn => jsn})
65
- else
66
- yield({:comment => line})
67
- end
68
- end
69
- end
70
-