biodiversity 0.7.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +2 -0
- data/Rakefile +4 -3
- data/VERSION +1 -1
- data/bin/parserver +83 -6
- data/examples/socket_client.rb +25 -0
- data/spec/guid/lsid.spec.rb +1 -2
- data/spec/parser/spec_helper.rb +0 -1
- metadata +8 -14
data/README.rdoc
CHANGED
data/Rakefile
CHANGED
@@ -2,14 +2,15 @@ dir = File.dirname(__FILE__)
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'rake'
|
4
4
|
#$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
|
5
|
-
require '
|
5
|
+
require 'rspec/core'
|
6
|
+
require 'rspec/core/rake_task'
|
6
7
|
|
7
8
|
#Gem::manage_gems
|
8
9
|
#require 'rake/gempackagetask'
|
9
10
|
|
10
11
|
task :default => :spec
|
11
12
|
|
12
|
-
|
13
|
+
RSpec::Core::RakeTask.new do |t|
|
13
14
|
t.pattern = 'spec/**/*spec.rb'
|
14
15
|
end
|
15
16
|
|
@@ -50,7 +51,7 @@ task :tt do
|
|
50
51
|
f.each_with_index do |l, i|
|
51
52
|
skip_head = l.match(/^# Autogenerated/) if i == 0
|
52
53
|
if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
|
53
|
-
next
|
54
|
+
next
|
54
55
|
else
|
55
56
|
skip_head = false
|
56
57
|
rfn.write(l)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/bin/parserver
CHANGED
@@ -1,18 +1,95 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'rubygems'
|
3
|
+
require 'optparse'
|
3
4
|
require 'socket'
|
4
5
|
require 'biodiversity' # Get sockets from stdlib
|
5
|
-
|
6
|
+
|
7
|
+
DEFAULT_PORT = 4334
|
8
|
+
RUBY_VERSION_INT = RUBY_VERSION.split(".")[0..1].join('').to_i
|
9
|
+
OPTIONS = {
|
10
|
+
:output => "json",
|
11
|
+
:port => DEFAULT_PORT
|
12
|
+
}
|
13
|
+
|
14
|
+
ARGV.options do |opts|
|
15
|
+
script_name = File.basename($0)
|
16
|
+
opts.banner = "Usage: ruby #{script_name} [options]"
|
17
|
+
|
18
|
+
opts.separator ""
|
19
|
+
|
20
|
+
opts.on("-o", "--output=output", String,
|
21
|
+
"Specifies the type of the output:
|
22
|
+
json - parsed results in json
|
23
|
+
canonical - canonical version
|
24
|
+
canonical_with_rank - canonical with rank",
|
25
|
+
"Default: json") { |OPTIONS[:output]| }
|
26
|
+
|
27
|
+
opts.separator ""
|
28
|
+
|
29
|
+
opts.on("-p", "--port=port", String,
|
30
|
+
"Specifies the port number",
|
31
|
+
"Default: #{DEFAULT_PORT}") { |OPTIONS[:port]| }
|
32
|
+
|
33
|
+
opts.separator ""
|
34
|
+
|
35
|
+
opts.on("-h", "--help",
|
36
|
+
"Show this help message.") { puts opts; exit }
|
37
|
+
|
38
|
+
opts.parse!
|
39
|
+
end
|
40
|
+
|
41
|
+
OPTIONS[:output] = "json" unless ['canonical', 'canonical_with_rank'].include?(OPTIONS[:output])
|
42
|
+
OPTIONS[:port] = (OPTIONS[:port].to_i > 0 ? OPTIONS[:port].to_i : DEFAULT_PORT)
|
43
|
+
|
44
|
+
def parser_error(name_string)
|
45
|
+
{:scientificName => {:parsed => false, :verbatim => name_string, :error => 'Parser error'}}
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_output(name_string, parser)
|
49
|
+
begin
|
50
|
+
if RUBY_VERSION_INT < 19
|
51
|
+
old_kcode = $KCODE
|
52
|
+
$KCODE = 'NONE'
|
53
|
+
end
|
54
|
+
parsed = parser.parse(name_string)
|
55
|
+
if RUBY_VERSION_INT < 19
|
56
|
+
$KCODE = old_kcode
|
57
|
+
end
|
58
|
+
rescue
|
59
|
+
parsed = parser_error(name_string)
|
60
|
+
end
|
61
|
+
output = OPTIONS[:output]
|
62
|
+
return parsed.to_json if output == 'json'
|
63
|
+
canonical = parsed[:scientificName][:canonical]
|
64
|
+
return canonical.to_s if output == 'canonical' || canonical == nil || parsed[:scientificName][:hybrid] || !parsed[:scientificName][:parsed]
|
65
|
+
parts = parsed[:scientificName][:canonical].split(" ")
|
66
|
+
|
67
|
+
if parts.size > 2 && parsed[:scientificName][:details][0][:infraspecies]
|
68
|
+
name_ary = parts[0..1]
|
69
|
+
parsed[:scientificName][:details][0][:infraspecies].each do |data|
|
70
|
+
name_ary << (data[:rank] && data[:rank] != 'n/a'? "#{data[:rank]} #{data[:string]}" : data[:string])
|
71
|
+
end
|
72
|
+
canonical = name_ary.join(" ")
|
73
|
+
end
|
74
|
+
canonical
|
75
|
+
end
|
76
|
+
|
77
|
+
puts "Running parser service on port #{OPTIONS[:port]}, output type is '#{OPTIONS[:output]}'"
|
6
78
|
parser = ScientificNameParser.new
|
7
|
-
server = TCPServer.open(
|
79
|
+
server = TCPServer.open(OPTIONS[:port]) # Socket to listen on a port
|
8
80
|
loop do # Servers run forever
|
9
81
|
client = server.accept # Wait for a client to connect
|
10
|
-
while
|
11
|
-
|
82
|
+
while true
|
83
|
+
begin
|
84
|
+
a = client.readline
|
85
|
+
if ['end','exit','q', '.'].include? a.strip
|
86
|
+
client.close
|
87
|
+
break
|
88
|
+
end
|
89
|
+
client.puts get_output(a, parser)
|
90
|
+
rescue EOFError
|
12
91
|
client.close
|
13
92
|
break
|
14
93
|
end
|
15
|
-
client.puts parser.parse(a).to_json
|
16
94
|
end
|
17
95
|
end
|
18
|
-
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'socket'
|
3
|
+
|
4
|
+
host = 'localhost'
|
5
|
+
port = 4334
|
6
|
+
|
7
|
+
f = open('10000_names.txt')
|
8
|
+
w = open('output.txt', 'w')
|
9
|
+
s = TCPSocket.open(host, port)
|
10
|
+
|
11
|
+
f.each_with_index do |line, i|
|
12
|
+
puts i if i % 1000 == 0
|
13
|
+
line = line.strip
|
14
|
+
s.puts(line.strip)
|
15
|
+
res = s.gets
|
16
|
+
if res && res.split(" ").size > 3
|
17
|
+
res = res.strip
|
18
|
+
w.write(line + "\n")
|
19
|
+
w.write(res + "\n")
|
20
|
+
w.write("\n")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
s.close
|
25
|
+
|
data/spec/guid/lsid.spec.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
dir = File.dirname("__FILE__")
|
2
2
|
require 'rubygems'
|
3
|
-
require 'spec'
|
4
3
|
require File.expand_path(dir + "../../conf/environment")
|
5
4
|
require File.expand_path(dir + "../../lib/biodiversity/guid")
|
6
5
|
|
7
|
-
describe LsidResolver do
|
6
|
+
describe LsidResolver do
|
8
7
|
it "should return RFD document from lsid" do
|
9
8
|
lsid = "urn:lsid:ubio.org:classificationbank:2232671"
|
10
9
|
LsidResolver.resolve(lsid).class.should == "".class
|
data/spec/parser/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
-
|
9
|
-
|
10
|
-
version: 0.7.3
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-07-09 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -79,6 +79,7 @@ files:
|
|
79
79
|
- bin/nnparse
|
80
80
|
- bin/parserver
|
81
81
|
- conf/environment.rb
|
82
|
+
- examples/socket_client.rb
|
82
83
|
- lib/biodiversity.rb
|
83
84
|
- lib/biodiversity/guid.rb
|
84
85
|
- lib/biodiversity/guid/lsid.rb
|
@@ -130,12 +131,5 @@ rubygems_version: 1.3.7
|
|
130
131
|
signing_key:
|
131
132
|
specification_version: 3
|
132
133
|
summary: Parser of scientific names
|
133
|
-
test_files:
|
134
|
-
|
135
|
-
- spec/guid/lsid.spec.rb
|
136
|
-
- spec/parser/scientific_name.spec.rb
|
137
|
-
- spec/parser/scientific_name_canonical.spec.rb
|
138
|
-
- spec/parser/scientific_name_clean.spec.rb
|
139
|
-
- spec/parser/scientific_name_dirty.spec.rb
|
140
|
-
- spec/parser/spec_helper.rb
|
141
|
-
- spec/spec_helper.rb
|
134
|
+
test_files: []
|
135
|
+
|