biodiversity19 0.5.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ *.swp
7
+ *.swo
8
+ biodiversity*.gem
9
+ *json
10
+ *xml
11
+ tmp
12
+ .DS_Store
13
+ spec/parser/test_data_new.txt
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Dmitry Mozzherin
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,44 @@
1
+ = Biodiversity
2
+
3
+ Parses species scientific name and breaks it into elements.
4
+
5
+ == Installation
6
+
7
+ To install gem you need RubyGems >= 1.2.0
8
+
9
+ $ gem sources -a http://gems.github.com (you only have to do this once)
10
+ $ sudo gem install dimus-biodiversity
11
+
12
+ == Example usage
13
+
14
+ You can parse file with species names from command line. File should contain one scientific name per line
15
+
16
+ nnparser file_with_names
17
+
18
+ You can use it as a library
19
+
20
+ require 'biodiversity'
21
+
22
+ parser = ScientificNameParser.new
23
+
24
+ # to parse a scientific name into a ruby hash
25
+ parser.parse("Plantago major")
26
+
27
+ #to get json representation
28
+ parser.parse("Plantago").to_json
29
+ #or
30
+ parser.parse("Plantago")
31
+ parser.all_json
32
+
33
+ # to clean name up
34
+ parser.parse(" Plantago major ")[:scientificName][:normalized]
35
+
36
+ # to get only cleaned up latin part of the name
37
+ parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:canonical]
38
+
39
+ # to get detailed information about elements of the name
40
+ parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
41
+
42
+ # to resolve lsid and get back RDF file
43
+ LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
44
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ dir = File.dirname(__FILE__)
2
+ require 'rubygems'
3
+ require 'rake'
4
+ #$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
5
+ require 'spec/rake/spectask'
6
+
7
+ #Gem::manage_gems
8
+ #require 'rake/gempackagetask'
9
+
10
+ task :default => :spec
11
+
12
+ Spec::Rake::SpecTask.new do |t|
13
+ t.pattern = 'spec/**/*spec.rb'
14
+ end
15
+
16
+
17
+ begin
18
+ require 'jeweler'
19
+ Jeweler::Tasks.new do |gem|
20
+ gem.name = "biodiversity19"
21
+ gem.summary = 'Parser of scientific names'
22
+ gem.description = 'Tools for biodiversity informatics for ruby 1.9'
23
+ gem.email = "dmozzherin@gmail.com"
24
+ gem.homepage = "http://github.com/dimus/biodiversity"
25
+ gem.authors = ["Dmitry Mozzherin"]
26
+ gem.has_rdoc = false
27
+ gem.bindir = 'bin'
28
+ gem.executables = ['nnparse']
29
+ gem.add_dependency('treetop')
30
+ gem.add_dependency('json') if RUBY_VERSION.split(".")[0..1].join('').to_i < 19
31
+ gem.add_development_dependency "rspec"
32
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
33
+ end
34
+ rescue LoadError
35
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
36
+ end
37
+
38
+ task :tt do
39
+ system("tt #{dir}/lib/biodiversity/parser/scientific_name_clean.treetop")
40
+ system("tt #{dir}/lib/biodiversity/parser/scientific_name_dirty.treetop")
41
+ system("tt #{dir}/lib/biodiversity/parser/scientific_name_canonical.treetop")
42
+ end
43
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.15
data/bin/nnparse ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil
4
+
5
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
6
+ require 'biodiversity'
7
+ require 'json'
8
+
9
+
10
+ if ARGV.empty?
11
+ puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
12
+ exit
13
+ end
14
+
15
+ input = ARGV[0]
16
+ output = ARGV[1] || 'parsed.json'
17
+
18
+ ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
19
+
20
+ p = ScientificNameParser.new
21
+ o = open(output, 'w')
22
+ count = 0
23
+ puts 'Parsing...'
24
+ IO.foreach(input) do |line|
25
+ count += 1
26
+ puts("%s lines parsed" % count) if count % 10000 == 0
27
+ name = line.gsub(/^[\d]*\s*/, '').strip
28
+ begin
29
+ if ruby_min_version < 19
30
+ old_kcode = $KCODE
31
+ $KCODE = 'NONE'
32
+ end
33
+ p.parse(name)
34
+ parsed_data = p.parsed.all_json rescue {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
35
+ if ruby_min_version < 19
36
+ $KCODE = old_kcode
37
+ end
38
+ rescue
39
+ parsed_data = {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
40
+ end
41
+ o.write parsed_data + "\n"
42
+ end
43
+
data/bin/parserver ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'socket'
4
+ require 'biodiversity' # Get sockets from stdlib
5
+ parser = ScientificNameParser.new
6
+ server = TCPServer.open(4334) # Socket to listen on port 4334
7
+ loop do # Servers run forever
8
+ client = server.accept # Wait for a client to connect
9
+ while a = client.readline
10
+ client.close if ['end','exit','q', '.'].include? a.strip
11
+ client.puts parser.parse(a).to_json
12
+ end
13
+ end
14
+
@@ -0,0 +1,85 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{biodiversity}
8
+ s.version = "0.5.15"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Dmitry Mozzherin"]
12
+ s.date = %q{2010-03-25}
13
+ s.default_executable = %q{nnparse}
14
+ s.description = %q{Tools for biodiversity informatics}
15
+ s.email = %q{dmozzherin@gmail.com}
16
+ s.executables = ["nnparse"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "LICENSE",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/nnparse",
29
+ "bin/parserver",
30
+ "biodiversity.gemspec",
31
+ "conf/environment.rb",
32
+ "lib/biodiversity.rb",
33
+ "lib/biodiversity/guid.rb",
34
+ "lib/biodiversity/guid/lsid.rb",
35
+ "lib/biodiversity/parser.rb",
36
+ "lib/biodiversity/parser/scientific_name_canonical.rb",
37
+ "lib/biodiversity/parser/scientific_name_canonical.treetop",
38
+ "lib/biodiversity/parser/scientific_name_clean.rb",
39
+ "lib/biodiversity/parser/scientific_name_clean.treetop",
40
+ "lib/biodiversity/parser/scientific_name_dirty.rb",
41
+ "lib/biodiversity/parser/scientific_name_dirty.treetop",
42
+ "pkg/.gitignore",
43
+ "spec/biodiversity_spec.rb",
44
+ "spec/guid/lsid.spec.rb",
45
+ "spec/parser/scientific_name.spec.rb",
46
+ "spec/parser/scientific_name_canonical.spec.rb",
47
+ "spec/parser/scientific_name_clean.spec.rb",
48
+ "spec/parser/scientific_name_dirty.spec.rb",
49
+ "spec/parser/spec_helper.rb",
50
+ "spec/parser/test_data.txt",
51
+ "spec/spec_helper.rb"
52
+ ]
53
+ s.homepage = %q{http://github.com/dimus/biodiversity}
54
+ s.rdoc_options = ["--charset=UTF-8"]
55
+ s.require_paths = ["lib"]
56
+ s.rubygems_version = %q{1.3.6}
57
+ s.summary = %q{Parser of scientific names}
58
+ s.test_files = [
59
+ "spec/parser/scientific_name_dirty.spec.rb",
60
+ "spec/parser/scientific_name_canonical.spec.rb",
61
+ "spec/parser/scientific_name_clean.spec.rb",
62
+ "spec/parser/spec_helper.rb",
63
+ "spec/parser/scientific_name.spec.rb",
64
+ "spec/biodiversity_spec.rb",
65
+ "spec/guid/lsid.spec.rb",
66
+ "spec/spec_helper.rb"
67
+ ]
68
+
69
+ if s.respond_to? :specification_version then
70
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
71
+ s.specification_version = 3
72
+
73
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
74
+ s.add_runtime_dependency(%q<treetop>, [">= 0"])
75
+ s.add_development_dependency(%q<rspec>, [">= 0"])
76
+ else
77
+ s.add_dependency(%q<treetop>, [">= 0"])
78
+ s.add_dependency(%q<rspec>, [">= 0"])
79
+ end
80
+ else
81
+ s.add_dependency(%q<treetop>, [">= 0"])
82
+ s.add_dependency(%q<rspec>, [">= 0"])
83
+ end
84
+ end
85
+
@@ -0,0 +1,3 @@
1
+
2
+ #Constants
3
+ LSID_RESOLVER_URL = "http://lsid.tdwg.org/"
@@ -0,0 +1,18 @@
1
+ require 'open-uri'
2
+
3
+ class LsidResolver
4
+ def self.resolve(lsid)
5
+ http_get_rdf(lsid)
6
+ end
7
+
8
+ protected
9
+ def self.http_get_rdf(lsid)
10
+ rdf = ''
11
+ open(LSID_RESOLVER_URL + lsid) do |f|
12
+ f.each do |line|
13
+ rdf += line if !line.strip.blank?
14
+ end
15
+ end
16
+ rdf
17
+ end
18
+ end
@@ -0,0 +1,2 @@
1
+ dir = File.dirname(__FILE__)
2
+ require File.join(dir, *%w[guid lsid])