biodiversity19 0.7.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +2 -0
- data/Rakefile +4 -3
- data/VERSION +1 -1
- data/bin/parserver +83 -6
- data/examples/socket_client.rb +25 -0
- data/spec/guid/lsid.spec.rb +1 -2
- data/spec/parser/spec_helper.rb +0 -1
- metadata +9 -30
- data/.gitignore +0 -16
data/README.rdoc
CHANGED
data/Rakefile
CHANGED
@@ -2,14 +2,15 @@ dir = File.dirname(__FILE__)
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'rake'
|
4
4
|
#$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
|
5
|
-
require '
|
5
|
+
require 'rspec/core'
|
6
|
+
require 'rspec/core/rake_task'
|
6
7
|
|
7
8
|
#Gem::manage_gems
|
8
9
|
#require 'rake/gempackagetask'
|
9
10
|
|
10
11
|
task :default => :spec
|
11
12
|
|
12
|
-
|
13
|
+
RSpec::Core::RakeTask.new do |t|
|
13
14
|
t.pattern = 'spec/**/*spec.rb'
|
14
15
|
end
|
15
16
|
|
@@ -50,7 +51,7 @@ task :tt do
|
|
50
51
|
f.each_with_index do |l, i|
|
51
52
|
skip_head = l.match(/^# Autogenerated/) if i == 0
|
52
53
|
if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
|
53
|
-
next
|
54
|
+
next
|
54
55
|
else
|
55
56
|
skip_head = false
|
56
57
|
rfn.write(l)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/bin/parserver
CHANGED
@@ -1,18 +1,95 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'rubygems'
|
3
|
+
require 'optparse'
|
3
4
|
require 'socket'
|
4
5
|
require 'biodiversity' # Get sockets from stdlib
|
5
|
-
|
6
|
+
|
7
|
+
DEFAULT_PORT = 4334
|
8
|
+
RUBY_VERSION_INT = RUBY_VERSION.split(".")[0..1].join('').to_i
|
9
|
+
OPTIONS = {
|
10
|
+
:output => "json",
|
11
|
+
:port => DEFAULT_PORT
|
12
|
+
}
|
13
|
+
|
14
|
+
ARGV.options do |opts|
|
15
|
+
script_name = File.basename($0)
|
16
|
+
opts.banner = "Usage: ruby #{script_name} [options]"
|
17
|
+
|
18
|
+
opts.separator ""
|
19
|
+
|
20
|
+
opts.on("-o", "--output=output", String,
|
21
|
+
"Specifies the type of the output:
|
22
|
+
json - parsed results in json
|
23
|
+
canonical - canonical version
|
24
|
+
canonical_with_rank - canonical with rank",
|
25
|
+
"Default: json") { |OPTIONS[:output]| }
|
26
|
+
|
27
|
+
opts.separator ""
|
28
|
+
|
29
|
+
opts.on("-p", "--port=port", String,
|
30
|
+
"Specifies the port number",
|
31
|
+
"Default: #{DEFAULT_PORT}") { |OPTIONS[:port]| }
|
32
|
+
|
33
|
+
opts.separator ""
|
34
|
+
|
35
|
+
opts.on("-h", "--help",
|
36
|
+
"Show this help message.") { puts opts; exit }
|
37
|
+
|
38
|
+
opts.parse!
|
39
|
+
end
|
40
|
+
|
41
|
+
OPTIONS[:output] = "json" unless ['canonical', 'canonical_with_rank'].include?(OPTIONS[:output])
|
42
|
+
OPTIONS[:port] = (OPTIONS[:port].to_i > 0 ? OPTIONS[:port].to_i : DEFAULT_PORT)
|
43
|
+
|
44
|
+
def parser_error(name_string)
|
45
|
+
{:scientificName => {:parsed => false, :verbatim => name_string, :error => 'Parser error'}}
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_output(name_string, parser)
|
49
|
+
begin
|
50
|
+
if RUBY_VERSION_INT < 19
|
51
|
+
old_kcode = $KCODE
|
52
|
+
$KCODE = 'NONE'
|
53
|
+
end
|
54
|
+
parsed = parser.parse(name_string)
|
55
|
+
if RUBY_VERSION_INT < 19
|
56
|
+
$KCODE = old_kcode
|
57
|
+
end
|
58
|
+
rescue
|
59
|
+
parsed = parser_error(name_string)
|
60
|
+
end
|
61
|
+
output = OPTIONS[:output]
|
62
|
+
return parsed.to_json if output == 'json'
|
63
|
+
canonical = parsed[:scientificName][:canonical]
|
64
|
+
return canonical.to_s if output == 'canonical' || canonical == nil || parsed[:scientificName][:hybrid] || !parsed[:scientificName][:parsed]
|
65
|
+
parts = parsed[:scientificName][:canonical].split(" ")
|
66
|
+
|
67
|
+
if parts.size > 2 && parsed[:scientificName][:details][0][:infraspecies]
|
68
|
+
name_ary = parts[0..1]
|
69
|
+
parsed[:scientificName][:details][0][:infraspecies].each do |data|
|
70
|
+
name_ary << (data[:rank] && data[:rank] != 'n/a'? "#{data[:rank]} #{data[:string]}" : data[:string])
|
71
|
+
end
|
72
|
+
canonical = name_ary.join(" ")
|
73
|
+
end
|
74
|
+
canonical
|
75
|
+
end
|
76
|
+
|
77
|
+
puts "Running parser service on port #{OPTIONS[:port]}, output type is '#{OPTIONS[:output]}'"
|
6
78
|
parser = ScientificNameParser.new
|
7
|
-
server = TCPServer.open(
|
79
|
+
server = TCPServer.open(OPTIONS[:port]) # Socket to listen on a port
|
8
80
|
loop do # Servers run forever
|
9
81
|
client = server.accept # Wait for a client to connect
|
10
|
-
while
|
11
|
-
|
82
|
+
while true
|
83
|
+
begin
|
84
|
+
a = client.readline
|
85
|
+
if ['end','exit','q', '.'].include? a.strip
|
86
|
+
client.close
|
87
|
+
break
|
88
|
+
end
|
89
|
+
client.puts get_output(a, parser)
|
90
|
+
rescue EOFError
|
12
91
|
client.close
|
13
92
|
break
|
14
93
|
end
|
15
|
-
client.puts parser.parse(a).to_json
|
16
94
|
end
|
17
95
|
end
|
18
|
-
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'socket'
|
3
|
+
|
4
|
+
host = 'localhost'
|
5
|
+
port = 4334
|
6
|
+
|
7
|
+
f = open('10000_names.txt')
|
8
|
+
w = open('output.txt', 'w')
|
9
|
+
s = TCPSocket.open(host, port)
|
10
|
+
|
11
|
+
f.each_with_index do |line, i|
|
12
|
+
puts i if i % 1000 == 0
|
13
|
+
line = line.strip
|
14
|
+
s.puts(line.strip)
|
15
|
+
res = s.gets
|
16
|
+
if res && res.split(" ").size > 3
|
17
|
+
res = res.strip
|
18
|
+
w.write(line + "\n")
|
19
|
+
w.write(res + "\n")
|
20
|
+
w.write("\n")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
s.close
|
25
|
+
|
data/spec/guid/lsid.spec.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
dir = File.dirname("__FILE__")
|
2
2
|
require 'rubygems'
|
3
|
-
require 'spec'
|
4
3
|
require File.expand_path(dir + "../../conf/environment")
|
5
4
|
require File.expand_path(dir + "../../lib/biodiversity/guid")
|
6
5
|
|
7
|
-
describe LsidResolver do
|
6
|
+
describe LsidResolver do
|
8
7
|
it "should return RFD document from lsid" do
|
9
8
|
lsid = "urn:lsid:ubio.org:classificationbank:2232671"
|
10
9
|
LsidResolver.resolve(lsid).class.should == "".class
|
data/spec/parser/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity19
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 7
|
8
|
-
- 3
|
9
|
-
version: 0.7.3
|
4
|
+
prerelease:
|
5
|
+
version: 1.0.0
|
10
6
|
platform: ruby
|
11
7
|
authors:
|
12
8
|
- Dmitry Mozzherin
|
@@ -14,8 +10,7 @@ autorequire:
|
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
12
|
|
17
|
-
date: 2011-
|
18
|
-
default_executable:
|
13
|
+
date: 2011-07-10 00:00:00 Z
|
19
14
|
dependencies:
|
20
15
|
- !ruby/object:Gem::Dependency
|
21
16
|
name: treetop
|
@@ -25,8 +20,6 @@ dependencies:
|
|
25
20
|
requirements:
|
26
21
|
- - ">="
|
27
22
|
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
23
|
version: "0"
|
31
24
|
type: :runtime
|
32
25
|
version_requirements: *id001
|
@@ -38,8 +31,6 @@ dependencies:
|
|
38
31
|
requirements:
|
39
32
|
- - ">="
|
40
33
|
- !ruby/object:Gem::Version
|
41
|
-
segments:
|
42
|
-
- 0
|
43
34
|
version: "0"
|
44
35
|
type: :development
|
45
36
|
version_requirements: *id002
|
@@ -55,7 +46,6 @@ extra_rdoc_files:
|
|
55
46
|
- README.rdoc
|
56
47
|
files:
|
57
48
|
- .document
|
58
|
-
- .gitignore
|
59
49
|
- LICENSE
|
60
50
|
- README.rdoc
|
61
51
|
- Rakefile
|
@@ -63,6 +53,7 @@ files:
|
|
63
53
|
- bin/nnparse
|
64
54
|
- bin/parserver
|
65
55
|
- conf/environment.rb
|
56
|
+
- examples/socket_client.rb
|
66
57
|
- lib/biodiversity.rb
|
67
58
|
- lib/biodiversity/guid.rb
|
68
59
|
- lib/biodiversity/guid/lsid.rb
|
@@ -80,13 +71,12 @@ files:
|
|
80
71
|
- spec/parser/spec_helper.rb
|
81
72
|
- spec/parser/test_data.txt
|
82
73
|
- spec/spec_helper.rb
|
83
|
-
has_rdoc: true
|
84
74
|
homepage: http://github.com/GlobalNamesArchitecture/biodiversity
|
85
75
|
licenses: []
|
86
76
|
|
87
77
|
post_install_message:
|
88
|
-
rdoc_options:
|
89
|
-
|
78
|
+
rdoc_options: []
|
79
|
+
|
90
80
|
require_paths:
|
91
81
|
- lib
|
92
82
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -94,30 +84,19 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
84
|
requirements:
|
95
85
|
- - ">="
|
96
86
|
- !ruby/object:Gem::Version
|
97
|
-
segments:
|
98
|
-
- 0
|
99
87
|
version: "0"
|
100
88
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
89
|
none: false
|
102
90
|
requirements:
|
103
91
|
- - ">="
|
104
92
|
- !ruby/object:Gem::Version
|
105
|
-
segments:
|
106
|
-
- 0
|
107
93
|
version: "0"
|
108
94
|
requirements: []
|
109
95
|
|
110
96
|
rubyforge_project:
|
111
|
-
rubygems_version: 1.
|
97
|
+
rubygems_version: 1.8.5
|
112
98
|
signing_key:
|
113
99
|
specification_version: 3
|
114
100
|
summary: Parser of scientific names
|
115
|
-
test_files:
|
116
|
-
|
117
|
-
- spec/guid/lsid.spec.rb
|
118
|
-
- spec/parser/scientific_name.spec.rb
|
119
|
-
- spec/parser/scientific_name_canonical.spec.rb
|
120
|
-
- spec/parser/scientific_name_clean.spec.rb
|
121
|
-
- spec/parser/scientific_name_dirty.spec.rb
|
122
|
-
- spec/parser/spec_helper.rb
|
123
|
-
- spec/spec_helper.rb
|
101
|
+
test_files: []
|
102
|
+
|