biodiversity19 0.5.15

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ *.swp
7
+ *.swo
8
+ biodiversity*.gem
9
+ *json
10
+ *xml
11
+ tmp
12
+ .DS_Store
13
+ spec/parser/test_data_new.txt
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Dmitry Mozzherin
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,44 @@
1
+ = Biodiversity
2
+
3
+ Parses species scientific name and breaks it into elements.
4
+
5
+ == Installation
6
+
7
+ To install gem you need RubyGems >= 1.2.0
8
+
9
+ $ gem sources -a http://gems.github.com (you only have to do this once)
10
+ $ sudo gem install dimus-biodiversity
11
+
12
+ == Example usage
13
+
14
+ You can parse file with species names from command line. File should contain one scientific name per line
15
+
16
+ nnparser file_with_names
17
+
18
+ You can use it as a library
19
+
20
+ require 'biodiversity'
21
+
22
+ parser = ScientificNameParser.new
23
+
24
+ # to parse a scientific name into a ruby hash
25
+ parser.parse("Plantago major")
26
+
27
+ #to get json representation
28
+ parser.parse("Plantago").to_json
29
+ #or
30
+ parser.parse("Plantago")
31
+ parser.all_json
32
+
33
+ # to clean name up
34
+ parser.parse(" Plantago major ")[:scientificName][:normalized]
35
+
36
+ # to get only cleaned up latin part of the name
37
+ parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:canonical]
38
+
39
+ # to get detailed information about elements of the name
40
+ parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
41
+
42
+ # to resolve lsid and get back RDF file
43
+ LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
44
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ dir = File.dirname(__FILE__)
2
+ require 'rubygems'
3
+ require 'rake'
4
+ #$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
5
+ require 'spec/rake/spectask'
6
+
7
+ #Gem::manage_gems
8
+ #require 'rake/gempackagetask'
9
+
10
+ task :default => :spec
11
+
12
+ Spec::Rake::SpecTask.new do |t|
13
+ t.pattern = 'spec/**/*spec.rb'
14
+ end
15
+
16
+
17
+ begin
18
+ require 'jeweler'
19
+ Jeweler::Tasks.new do |gem|
20
+ gem.name = "biodiversity19"
21
+ gem.summary = 'Parser of scientific names'
22
+ gem.description = 'Tools for biodiversity informatics for ruby 1.9'
23
+ gem.email = "dmozzherin@gmail.com"
24
+ gem.homepage = "http://github.com/dimus/biodiversity"
25
+ gem.authors = ["Dmitry Mozzherin"]
26
+ gem.has_rdoc = false
27
+ gem.bindir = 'bin'
28
+ gem.executables = ['nnparse']
29
+ gem.add_dependency('treetop')
30
+ gem.add_dependency('json') if RUBY_VERSION.split(".")[0..1].join('').to_i < 19
31
+ gem.add_development_dependency "rspec"
32
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
33
+ end
34
+ rescue LoadError
35
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
36
+ end
37
+
38
+ task :tt do
39
+ system("tt #{dir}/lib/biodiversity/parser/scientific_name_clean.treetop")
40
+ system("tt #{dir}/lib/biodiversity/parser/scientific_name_dirty.treetop")
41
+ system("tt #{dir}/lib/biodiversity/parser/scientific_name_canonical.treetop")
42
+ end
43
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.15
data/bin/nnparse ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil
4
+
5
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
6
+ require 'biodiversity'
7
+ require 'json'
8
+
9
+
10
+ if ARGV.empty?
11
+ puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
12
+ exit
13
+ end
14
+
15
+ input = ARGV[0]
16
+ output = ARGV[1] || 'parsed.json'
17
+
18
+ ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
19
+
20
+ p = ScientificNameParser.new
21
+ o = open(output, 'w')
22
+ count = 0
23
+ puts 'Parsing...'
24
+ IO.foreach(input) do |line|
25
+ count += 1
26
+ puts("%s lines parsed" % count) if count % 10000 == 0
27
+ name = line.gsub(/^[\d]*\s*/, '').strip
28
+ begin
29
+ if ruby_min_version < 19
30
+ old_kcode = $KCODE
31
+ $KCODE = 'NONE'
32
+ end
33
+ p.parse(name)
34
+ parsed_data = p.parsed.all_json rescue {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
35
+ if ruby_min_version < 19
36
+ $KCODE = old_kcode
37
+ end
38
+ rescue
39
+ parsed_data = {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
40
+ end
41
+ o.write parsed_data + "\n"
42
+ end
43
+
data/bin/parserver ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'socket'
4
+ require 'biodiversity' # Get sockets from stdlib
5
+ parser = ScientificNameParser.new
6
+ server = TCPServer.open(4334) # Socket to listen on port 4334
7
+ loop do # Servers run forever
8
+ client = server.accept # Wait for a client to connect
9
+ while a = client.readline
10
+ client.close if ['end','exit','q', '.'].include? a.strip
11
+ client.puts parser.parse(a).to_json
12
+ end
13
+ end
14
+
@@ -0,0 +1,85 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{biodiversity}
8
+ s.version = "0.5.15"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Dmitry Mozzherin"]
12
+ s.date = %q{2010-03-25}
13
+ s.default_executable = %q{nnparse}
14
+ s.description = %q{Tools for biodiversity informatics}
15
+ s.email = %q{dmozzherin@gmail.com}
16
+ s.executables = ["nnparse"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "LICENSE",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/nnparse",
29
+ "bin/parserver",
30
+ "biodiversity.gemspec",
31
+ "conf/environment.rb",
32
+ "lib/biodiversity.rb",
33
+ "lib/biodiversity/guid.rb",
34
+ "lib/biodiversity/guid/lsid.rb",
35
+ "lib/biodiversity/parser.rb",
36
+ "lib/biodiversity/parser/scientific_name_canonical.rb",
37
+ "lib/biodiversity/parser/scientific_name_canonical.treetop",
38
+ "lib/biodiversity/parser/scientific_name_clean.rb",
39
+ "lib/biodiversity/parser/scientific_name_clean.treetop",
40
+ "lib/biodiversity/parser/scientific_name_dirty.rb",
41
+ "lib/biodiversity/parser/scientific_name_dirty.treetop",
42
+ "pkg/.gitignore",
43
+ "spec/biodiversity_spec.rb",
44
+ "spec/guid/lsid.spec.rb",
45
+ "spec/parser/scientific_name.spec.rb",
46
+ "spec/parser/scientific_name_canonical.spec.rb",
47
+ "spec/parser/scientific_name_clean.spec.rb",
48
+ "spec/parser/scientific_name_dirty.spec.rb",
49
+ "spec/parser/spec_helper.rb",
50
+ "spec/parser/test_data.txt",
51
+ "spec/spec_helper.rb"
52
+ ]
53
+ s.homepage = %q{http://github.com/dimus/biodiversity}
54
+ s.rdoc_options = ["--charset=UTF-8"]
55
+ s.require_paths = ["lib"]
56
+ s.rubygems_version = %q{1.3.6}
57
+ s.summary = %q{Parser of scientific names}
58
+ s.test_files = [
59
+ "spec/parser/scientific_name_dirty.spec.rb",
60
+ "spec/parser/scientific_name_canonical.spec.rb",
61
+ "spec/parser/scientific_name_clean.spec.rb",
62
+ "spec/parser/spec_helper.rb",
63
+ "spec/parser/scientific_name.spec.rb",
64
+ "spec/biodiversity_spec.rb",
65
+ "spec/guid/lsid.spec.rb",
66
+ "spec/spec_helper.rb"
67
+ ]
68
+
69
+ if s.respond_to? :specification_version then
70
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
71
+ s.specification_version = 3
72
+
73
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
74
+ s.add_runtime_dependency(%q<treetop>, [">= 0"])
75
+ s.add_development_dependency(%q<rspec>, [">= 0"])
76
+ else
77
+ s.add_dependency(%q<treetop>, [">= 0"])
78
+ s.add_dependency(%q<rspec>, [">= 0"])
79
+ end
80
+ else
81
+ s.add_dependency(%q<treetop>, [">= 0"])
82
+ s.add_dependency(%q<rspec>, [">= 0"])
83
+ end
84
+ end
85
+
@@ -0,0 +1,3 @@
1
+
2
+ #Constants
3
+ LSID_RESOLVER_URL = "http://lsid.tdwg.org/"
@@ -0,0 +1,18 @@
1
+ require 'open-uri'
2
+
3
+ class LsidResolver
4
+ def self.resolve(lsid)
5
+ http_get_rdf(lsid)
6
+ end
7
+
8
+ protected
9
+ def self.http_get_rdf(lsid)
10
+ rdf = ''
11
+ open(LSID_RESOLVER_URL + lsid) do |f|
12
+ f.each do |line|
13
+ rdf += line if !line.strip.blank?
14
+ end
15
+ end
16
+ rdf
17
+ end
18
+ end
@@ -0,0 +1,2 @@
1
+ dir = File.dirname(__FILE__)
2
+ require File.join(dir, *%w[guid lsid])