biodiversity19 0.5.15
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +13 -0
- data/LICENSE +20 -0
- data/README.rdoc +44 -0
- data/Rakefile +43 -0
- data/VERSION +1 -0
- data/bin/nnparse +43 -0
- data/bin/parserver +14 -0
- data/biodiversity.gemspec +85 -0
- data/conf/environment.rb +3 -0
- data/lib/biodiversity/guid/lsid.rb +18 -0
- data/lib/biodiversity/guid.rb +2 -0
- data/lib/biodiversity/parser/scientific_name_canonical.rb +475 -0
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +111 -0
- data/lib/biodiversity/parser/scientific_name_clean.rb +6142 -0
- data/lib/biodiversity/parser/scientific_name_clean.treetop +1195 -0
- data/lib/biodiversity/parser/scientific_name_dirty.rb +1096 -0
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +211 -0
- data/lib/biodiversity/parser.rb +57 -0
- data/lib/biodiversity.rb +9 -0
- data/pkg/.gitignore +0 -0
- data/spec/biodiversity_spec.rb +0 -0
- data/spec/guid/lsid.spec.rb +12 -0
- data/spec/parser/scientific_name.spec.rb +35 -0
- data/spec/parser/scientific_name_canonical.spec.rb +27 -0
- data/spec/parser/scientific_name_clean.spec.rb +504 -0
- data/spec/parser/scientific_name_dirty.spec.rb +90 -0
- data/spec/parser/spec_helper.rb +69 -0
- data/spec/parser/test_data.txt +235 -0
- data/spec/spec_helper.rb +0 -0
- metadata +122 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Dmitry Mozzherin
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
= Biodiversity
|
2
|
+
|
3
|
+
Parses species scientific name and breaks it into elements.
|
4
|
+
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
To install gem you need RubyGems >= 1.2.0
|
8
|
+
|
9
|
+
$ gem sources -a http://gems.github.com (you only have to do this once)
|
10
|
+
$ sudo gem install dimus-biodiversity
|
11
|
+
|
12
|
+
== Example usage
|
13
|
+
|
14
|
+
You can parse file with species names from command line. File should contain one scientific name per line
|
15
|
+
|
16
|
+
nnparser file_with_names
|
17
|
+
|
18
|
+
You can use it as a library
|
19
|
+
|
20
|
+
require 'biodiversity'
|
21
|
+
|
22
|
+
parser = ScientificNameParser.new
|
23
|
+
|
24
|
+
# to parse a scientific name into a ruby hash
|
25
|
+
parser.parse("Plantago major")
|
26
|
+
|
27
|
+
#to get json representation
|
28
|
+
parser.parse("Plantago").to_json
|
29
|
+
#or
|
30
|
+
parser.parse("Plantago")
|
31
|
+
parser.all_json
|
32
|
+
|
33
|
+
# to clean name up
|
34
|
+
parser.parse(" Plantago major ")[:scientificName][:normalized]
|
35
|
+
|
36
|
+
# to get only cleaned up latin part of the name
|
37
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:canonical]
|
38
|
+
|
39
|
+
# to get detailed information about elements of the name
|
40
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
|
41
|
+
|
42
|
+
# to resolve lsid and get back RDF file
|
43
|
+
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
44
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake'
|
4
|
+
#$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
|
5
|
+
require 'spec/rake/spectask'
|
6
|
+
|
7
|
+
#Gem::manage_gems
|
8
|
+
#require 'rake/gempackagetask'
|
9
|
+
|
10
|
+
task :default => :spec
|
11
|
+
|
12
|
+
Spec::Rake::SpecTask.new do |t|
|
13
|
+
t.pattern = 'spec/**/*spec.rb'
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
begin
|
18
|
+
require 'jeweler'
|
19
|
+
Jeweler::Tasks.new do |gem|
|
20
|
+
gem.name = "biodiversity19"
|
21
|
+
gem.summary = 'Parser of scientific names'
|
22
|
+
gem.description = 'Tools for biodiversity informatics for ruby 1.9'
|
23
|
+
gem.email = "dmozzherin@gmail.com"
|
24
|
+
gem.homepage = "http://github.com/dimus/biodiversity"
|
25
|
+
gem.authors = ["Dmitry Mozzherin"]
|
26
|
+
gem.has_rdoc = false
|
27
|
+
gem.bindir = 'bin'
|
28
|
+
gem.executables = ['nnparse']
|
29
|
+
gem.add_dependency('treetop')
|
30
|
+
gem.add_dependency('json') if RUBY_VERSION.split(".")[0..1].join('').to_i < 19
|
31
|
+
gem.add_development_dependency "rspec"
|
32
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
33
|
+
end
|
34
|
+
rescue LoadError
|
35
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
36
|
+
end
|
37
|
+
|
38
|
+
task :tt do
|
39
|
+
system("tt #{dir}/lib/biodiversity/parser/scientific_name_clean.treetop")
|
40
|
+
system("tt #{dir}/lib/biodiversity/parser/scientific_name_dirty.treetop")
|
41
|
+
system("tt #{dir}/lib/biodiversity/parser/scientific_name_canonical.treetop")
|
42
|
+
end
|
43
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.5.15
|
data/bin/nnparse
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
6
|
+
require 'biodiversity'
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
|
10
|
+
if ARGV.empty?
|
11
|
+
puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
|
12
|
+
exit
|
13
|
+
end
|
14
|
+
|
15
|
+
input = ARGV[0]
|
16
|
+
output = ARGV[1] || 'parsed.json'
|
17
|
+
|
18
|
+
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
|
19
|
+
|
20
|
+
p = ScientificNameParser.new
|
21
|
+
o = open(output, 'w')
|
22
|
+
count = 0
|
23
|
+
puts 'Parsing...'
|
24
|
+
IO.foreach(input) do |line|
|
25
|
+
count += 1
|
26
|
+
puts("%s lines parsed" % count) if count % 10000 == 0
|
27
|
+
name = line.gsub(/^[\d]*\s*/, '').strip
|
28
|
+
begin
|
29
|
+
if ruby_min_version < 19
|
30
|
+
old_kcode = $KCODE
|
31
|
+
$KCODE = 'NONE'
|
32
|
+
end
|
33
|
+
p.parse(name)
|
34
|
+
parsed_data = p.parsed.all_json rescue {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
35
|
+
if ruby_min_version < 19
|
36
|
+
$KCODE = old_kcode
|
37
|
+
end
|
38
|
+
rescue
|
39
|
+
parsed_data = {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
40
|
+
end
|
41
|
+
o.write parsed_data + "\n"
|
42
|
+
end
|
43
|
+
|
data/bin/parserver
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'socket'
|
4
|
+
require 'biodiversity' # Get sockets from stdlib
|
5
|
+
parser = ScientificNameParser.new
|
6
|
+
server = TCPServer.open(4334) # Socket to listen on port 4334
|
7
|
+
loop do # Servers run forever
|
8
|
+
client = server.accept # Wait for a client to connect
|
9
|
+
while a = client.readline
|
10
|
+
client.close if ['end','exit','q', '.'].include? a.strip
|
11
|
+
client.puts parser.parse(a).to_json
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{biodiversity}
|
8
|
+
s.version = "0.5.15"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Dmitry Mozzherin"]
|
12
|
+
s.date = %q{2010-03-25}
|
13
|
+
s.default_executable = %q{nnparse}
|
14
|
+
s.description = %q{Tools for biodiversity informatics}
|
15
|
+
s.email = %q{dmozzherin@gmail.com}
|
16
|
+
s.executables = ["nnparse"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
".gitignore",
|
24
|
+
"LICENSE",
|
25
|
+
"README.rdoc",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"bin/nnparse",
|
29
|
+
"bin/parserver",
|
30
|
+
"biodiversity.gemspec",
|
31
|
+
"conf/environment.rb",
|
32
|
+
"lib/biodiversity.rb",
|
33
|
+
"lib/biodiversity/guid.rb",
|
34
|
+
"lib/biodiversity/guid/lsid.rb",
|
35
|
+
"lib/biodiversity/parser.rb",
|
36
|
+
"lib/biodiversity/parser/scientific_name_canonical.rb",
|
37
|
+
"lib/biodiversity/parser/scientific_name_canonical.treetop",
|
38
|
+
"lib/biodiversity/parser/scientific_name_clean.rb",
|
39
|
+
"lib/biodiversity/parser/scientific_name_clean.treetop",
|
40
|
+
"lib/biodiversity/parser/scientific_name_dirty.rb",
|
41
|
+
"lib/biodiversity/parser/scientific_name_dirty.treetop",
|
42
|
+
"pkg/.gitignore",
|
43
|
+
"spec/biodiversity_spec.rb",
|
44
|
+
"spec/guid/lsid.spec.rb",
|
45
|
+
"spec/parser/scientific_name.spec.rb",
|
46
|
+
"spec/parser/scientific_name_canonical.spec.rb",
|
47
|
+
"spec/parser/scientific_name_clean.spec.rb",
|
48
|
+
"spec/parser/scientific_name_dirty.spec.rb",
|
49
|
+
"spec/parser/spec_helper.rb",
|
50
|
+
"spec/parser/test_data.txt",
|
51
|
+
"spec/spec_helper.rb"
|
52
|
+
]
|
53
|
+
s.homepage = %q{http://github.com/dimus/biodiversity}
|
54
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
55
|
+
s.require_paths = ["lib"]
|
56
|
+
s.rubygems_version = %q{1.3.6}
|
57
|
+
s.summary = %q{Parser of scientific names}
|
58
|
+
s.test_files = [
|
59
|
+
"spec/parser/scientific_name_dirty.spec.rb",
|
60
|
+
"spec/parser/scientific_name_canonical.spec.rb",
|
61
|
+
"spec/parser/scientific_name_clean.spec.rb",
|
62
|
+
"spec/parser/spec_helper.rb",
|
63
|
+
"spec/parser/scientific_name.spec.rb",
|
64
|
+
"spec/biodiversity_spec.rb",
|
65
|
+
"spec/guid/lsid.spec.rb",
|
66
|
+
"spec/spec_helper.rb"
|
67
|
+
]
|
68
|
+
|
69
|
+
if s.respond_to? :specification_version then
|
70
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
71
|
+
s.specification_version = 3
|
72
|
+
|
73
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
74
|
+
s.add_runtime_dependency(%q<treetop>, [">= 0"])
|
75
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
76
|
+
else
|
77
|
+
s.add_dependency(%q<treetop>, [">= 0"])
|
78
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
79
|
+
end
|
80
|
+
else
|
81
|
+
s.add_dependency(%q<treetop>, [">= 0"])
|
82
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
data/conf/environment.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
class LsidResolver
|
4
|
+
def self.resolve(lsid)
|
5
|
+
http_get_rdf(lsid)
|
6
|
+
end
|
7
|
+
|
8
|
+
protected
|
9
|
+
def self.http_get_rdf(lsid)
|
10
|
+
rdf = ''
|
11
|
+
open(LSID_RESOLVER_URL + lsid) do |f|
|
12
|
+
f.each do |line|
|
13
|
+
rdf += line if !line.strip.blank?
|
14
|
+
end
|
15
|
+
end
|
16
|
+
rdf
|
17
|
+
end
|
18
|
+
end
|