biodiversity 0.7.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -67,3 +67,5 @@ You can use it as a library
67
67
  # to resolve lsid and get back RDF file
68
68
  LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
69
69
 
70
+ Copyright (c) 2009-2011 Marine Biological Laboratory. See LICENSE.txt for
71
+ further details.
data/Rakefile CHANGED
@@ -2,14 +2,15 @@ dir = File.dirname(__FILE__)
2
2
  require 'rubygems'
3
3
  require 'rake'
4
4
  #$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
5
- require 'spec/rake/spectask'
5
+ require 'rspec/core'
6
+ require 'rspec/core/rake_task'
6
7
 
7
8
  #Gem::manage_gems
8
9
  #require 'rake/gempackagetask'
9
10
 
10
11
  task :default => :spec
11
12
 
12
- Spec::Rake::SpecTask.new do |t|
13
+ RSpec::Core::RakeTask.new do |t|
13
14
  t.pattern = 'spec/**/*spec.rb'
14
15
  end
15
16
 
@@ -50,7 +51,7 @@ task :tt do
50
51
  f.each_with_index do |l, i|
51
52
  skip_head = l.match(/^# Autogenerated/) if i == 0
52
53
  if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
53
- next
54
+ next
54
55
  else
55
56
  skip_head = false
56
57
  rfn.write(l)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.3
1
+ 1.0.0
data/bin/parserver CHANGED
@@ -1,18 +1,95 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'rubygems'
3
+ require 'optparse'
3
4
  require 'socket'
4
5
  require 'biodiversity' # Get sockets from stdlib
5
- puts "Running parser service on port 4334"
6
+
7
+ DEFAULT_PORT = 4334
8
+ RUBY_VERSION_INT = RUBY_VERSION.split(".")[0..1].join('').to_i
9
+ OPTIONS = {
10
+ :output => "json",
11
+ :port => DEFAULT_PORT
12
+ }
13
+
14
+ ARGV.options do |opts|
15
+ script_name = File.basename($0)
16
+ opts.banner = "Usage: ruby #{script_name} [options]"
17
+
18
+ opts.separator ""
19
+
20
+ opts.on("-o", "--output=output", String,
21
+ "Specifies the type of the output:
22
+ json - parsed results in json
23
+ canonical - canonical version
24
+ canonical_with_rank - canonical with rank",
25
+ "Default: json") { |OPTIONS[:output]| }
26
+
27
+ opts.separator ""
28
+
29
+ opts.on("-p", "--port=port", String,
30
+ "Specifies the port number",
31
+ "Default: #{DEFAULT_PORT}") { |OPTIONS[:port]| }
32
+
33
+ opts.separator ""
34
+
35
+ opts.on("-h", "--help",
36
+ "Show this help message.") { puts opts; exit }
37
+
38
+ opts.parse!
39
+ end
40
+
41
+ OPTIONS[:output] = "json" unless ['canonical', 'canonical_with_rank'].include?(OPTIONS[:output])
42
+ OPTIONS[:port] = (OPTIONS[:port].to_i > 0 ? OPTIONS[:port].to_i : DEFAULT_PORT)
43
+
44
+ def parser_error(name_string)
45
+ {:scientificName => {:parsed => false, :verbatim => name_string, :error => 'Parser error'}}
46
+ end
47
+
48
+ def get_output(name_string, parser)
49
+ begin
50
+ if RUBY_VERSION_INT < 19
51
+ old_kcode = $KCODE
52
+ $KCODE = 'NONE'
53
+ end
54
+ parsed = parser.parse(name_string)
55
+ if RUBY_VERSION_INT < 19
56
+ $KCODE = old_kcode
57
+ end
58
+ rescue
59
+ parsed = parser_error(name_string)
60
+ end
61
+ output = OPTIONS[:output]
62
+ return parsed.to_json if output == 'json'
63
+ canonical = parsed[:scientificName][:canonical]
64
+ return canonical.to_s if output == 'canonical' || canonical == nil || parsed[:scientificName][:hybrid] || !parsed[:scientificName][:parsed]
65
+ parts = parsed[:scientificName][:canonical].split(" ")
66
+
67
+ if parts.size > 2 && parsed[:scientificName][:details][0][:infraspecies]
68
+ name_ary = parts[0..1]
69
+ parsed[:scientificName][:details][0][:infraspecies].each do |data|
70
+ name_ary << (data[:rank] && data[:rank] != 'n/a'? "#{data[:rank]} #{data[:string]}" : data[:string])
71
+ end
72
+ canonical = name_ary.join(" ")
73
+ end
74
+ canonical
75
+ end
76
+
77
+ puts "Running parser service on port #{OPTIONS[:port]}, output type is '#{OPTIONS[:output]}'"
6
78
  parser = ScientificNameParser.new
7
- server = TCPServer.open(4334) # Socket to listen on port 4334
79
+ server = TCPServer.open(OPTIONS[:port]) # Socket to listen on a port
8
80
  loop do # Servers run forever
9
81
  client = server.accept # Wait for a client to connect
10
- while a = client.readline
11
- if ['end','exit','q', '.'].include? a.strip
82
+ while true
83
+ begin
84
+ a = client.readline
85
+ if ['end','exit','q', '.'].include? a.strip
86
+ client.close
87
+ break
88
+ end
89
+ client.puts get_output(a, parser)
90
+ rescue EOFError
12
91
  client.close
13
92
  break
14
93
  end
15
- client.puts parser.parse(a).to_json
16
94
  end
17
95
  end
18
-
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+ require 'socket'
3
+
4
+ host = 'localhost'
5
+ port = 4334
6
+
7
+ f = open('10000_names.txt')
8
+ w = open('output.txt', 'w')
9
+ s = TCPSocket.open(host, port)
10
+
11
+ f.each_with_index do |line, i|
12
+ puts i if i % 1000 == 0
13
+ line = line.strip
14
+ s.puts(line.strip)
15
+ res = s.gets
16
+ if res && res.split(" ").size > 3
17
+ res = res.strip
18
+ w.write(line + "\n")
19
+ w.write(res + "\n")
20
+ w.write("\n")
21
+ end
22
+ end
23
+
24
+ s.close
25
+
@@ -1,10 +1,9 @@
1
1
  dir = File.dirname("__FILE__")
2
2
  require 'rubygems'
3
- require 'spec'
4
3
  require File.expand_path(dir + "../../conf/environment")
5
4
  require File.expand_path(dir + "../../lib/biodiversity/guid")
6
5
 
7
- describe LsidResolver do
6
+ describe LsidResolver do
8
7
  it "should return RFD document from lsid" do
9
8
  lsid = "urn:lsid:ubio.org:classificationbank:2232671"
10
9
  LsidResolver.resolve(lsid).class.should == "".class
@@ -1,6 +1,5 @@
1
1
  dir = File.dirname("__FILE__")
2
2
  require 'rubygems'
3
- require 'spec'
4
3
  require 'yaml'
5
4
  require 'treetop'
6
5
  require 'json'
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- hash: 5
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
+ - 1
7
8
  - 0
8
- - 7
9
- - 3
10
- version: 0.7.3
9
+ - 0
10
+ version: 1.0.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Dmitry Mozzherin
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-02-07 00:00:00 -05:00
18
+ date: 2011-07-09 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -79,6 +79,7 @@ files:
79
79
  - bin/nnparse
80
80
  - bin/parserver
81
81
  - conf/environment.rb
82
+ - examples/socket_client.rb
82
83
  - lib/biodiversity.rb
83
84
  - lib/biodiversity/guid.rb
84
85
  - lib/biodiversity/guid/lsid.rb
@@ -130,12 +131,5 @@ rubygems_version: 1.3.7
130
131
  signing_key:
131
132
  specification_version: 3
132
133
  summary: Parser of scientific names
133
- test_files:
134
- - spec/biodiversity_spec.rb
135
- - spec/guid/lsid.spec.rb
136
- - spec/parser/scientific_name.spec.rb
137
- - spec/parser/scientific_name_canonical.spec.rb
138
- - spec/parser/scientific_name_clean.spec.rb
139
- - spec/parser/scientific_name_dirty.spec.rb
140
- - spec/parser/spec_helper.rb
141
- - spec/spec_helper.rb
134
+ test_files: []
135
+