biodiversity19 0.7.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -67,3 +67,5 @@ You can use it as a library
67
67
  # to resolve lsid and get back RDF file
68
68
  LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
69
69
 
70
+ Copyright (c) 2009-2011 Marine Biological Laboratory. See LICENSE.txt for
71
+ further details.
data/Rakefile CHANGED
@@ -2,14 +2,15 @@ dir = File.dirname(__FILE__)
2
2
  require 'rubygems'
3
3
  require 'rake'
4
4
  #$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
5
- require 'spec/rake/spectask'
5
+ require 'rspec/core'
6
+ require 'rspec/core/rake_task'
6
7
 
7
8
  #Gem::manage_gems
8
9
  #require 'rake/gempackagetask'
9
10
 
10
11
  task :default => :spec
11
12
 
12
- Spec::Rake::SpecTask.new do |t|
13
+ RSpec::Core::RakeTask.new do |t|
13
14
  t.pattern = 'spec/**/*spec.rb'
14
15
  end
15
16
 
@@ -50,7 +51,7 @@ task :tt do
50
51
  f.each_with_index do |l, i|
51
52
  skip_head = l.match(/^# Autogenerated/) if i == 0
52
53
  if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
53
- next
54
+ next
54
55
  else
55
56
  skip_head = false
56
57
  rfn.write(l)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.3
1
+ 1.0.0
data/bin/parserver CHANGED
@@ -1,18 +1,95 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'rubygems'
3
+ require 'optparse'
3
4
  require 'socket'
4
5
  require 'biodiversity' # Get sockets from stdlib
5
- puts "Running parser service on port 4334"
6
+
7
+ DEFAULT_PORT = 4334
8
+ RUBY_VERSION_INT = RUBY_VERSION.split(".")[0..1].join('').to_i
9
+ OPTIONS = {
10
+ :output => "json",
11
+ :port => DEFAULT_PORT
12
+ }
13
+
14
+ ARGV.options do |opts|
15
+ script_name = File.basename($0)
16
+ opts.banner = "Usage: ruby #{script_name} [options]"
17
+
18
+ opts.separator ""
19
+
20
+ opts.on("-o", "--output=output", String,
21
+ "Specifies the type of the output:
22
+ json - parsed results in json
23
+ canonical - canonical version
24
+ canonical_with_rank - canonical with rank",
25
+ "Default: json") { |OPTIONS[:output]| }
26
+
27
+ opts.separator ""
28
+
29
+ opts.on("-p", "--port=port", String,
30
+ "Specifies the port number",
31
+ "Default: #{DEFAULT_PORT}") { |OPTIONS[:port]| }
32
+
33
+ opts.separator ""
34
+
35
+ opts.on("-h", "--help",
36
+ "Show this help message.") { puts opts; exit }
37
+
38
+ opts.parse!
39
+ end
40
+
41
+ OPTIONS[:output] = "json" unless ['canonical', 'canonical_with_rank'].include?(OPTIONS[:output])
42
+ OPTIONS[:port] = (OPTIONS[:port].to_i > 0 ? OPTIONS[:port].to_i : DEFAULT_PORT)
43
+
44
+ def parser_error(name_string)
45
+ {:scientificName => {:parsed => false, :verbatim => name_string, :error => 'Parser error'}}
46
+ end
47
+
48
+ def get_output(name_string, parser)
49
+ begin
50
+ if RUBY_VERSION_INT < 19
51
+ old_kcode = $KCODE
52
+ $KCODE = 'NONE'
53
+ end
54
+ parsed = parser.parse(name_string)
55
+ if RUBY_VERSION_INT < 19
56
+ $KCODE = old_kcode
57
+ end
58
+ rescue
59
+ parsed = parser_error(name_string)
60
+ end
61
+ output = OPTIONS[:output]
62
+ return parsed.to_json if output == 'json'
63
+ canonical = parsed[:scientificName][:canonical]
64
+ return canonical.to_s if output == 'canonical' || canonical == nil || parsed[:scientificName][:hybrid] || !parsed[:scientificName][:parsed]
65
+ parts = parsed[:scientificName][:canonical].split(" ")
66
+
67
+ if parts.size > 2 && parsed[:scientificName][:details][0][:infraspecies]
68
+ name_ary = parts[0..1]
69
+ parsed[:scientificName][:details][0][:infraspecies].each do |data|
70
+ name_ary << (data[:rank] && data[:rank] != 'n/a'? "#{data[:rank]} #{data[:string]}" : data[:string])
71
+ end
72
+ canonical = name_ary.join(" ")
73
+ end
74
+ canonical
75
+ end
76
+
77
+ puts "Running parser service on port #{OPTIONS[:port]}, output type is '#{OPTIONS[:output]}'"
6
78
  parser = ScientificNameParser.new
7
- server = TCPServer.open(4334) # Socket to listen on port 4334
79
+ server = TCPServer.open(OPTIONS[:port]) # Socket to listen on a port
8
80
  loop do # Servers run forever
9
81
  client = server.accept # Wait for a client to connect
10
- while a = client.readline
11
- if ['end','exit','q', '.'].include? a.strip
82
+ while true
83
+ begin
84
+ a = client.readline
85
+ if ['end','exit','q', '.'].include? a.strip
86
+ client.close
87
+ break
88
+ end
89
+ client.puts get_output(a, parser)
90
+ rescue EOFError
12
91
  client.close
13
92
  break
14
93
  end
15
- client.puts parser.parse(a).to_json
16
94
  end
17
95
  end
18
-
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+ require 'socket'
3
+
4
+ host = 'localhost'
5
+ port = 4334
6
+
7
+ f = open('10000_names.txt')
8
+ w = open('output.txt', 'w')
9
+ s = TCPSocket.open(host, port)
10
+
11
+ f.each_with_index do |line, i|
12
+ puts i if i % 1000 == 0
13
+ line = line.strip
14
+ s.puts(line.strip)
15
+ res = s.gets
16
+ if res && res.split(" ").size > 3
17
+ res = res.strip
18
+ w.write(line + "\n")
19
+ w.write(res + "\n")
20
+ w.write("\n")
21
+ end
22
+ end
23
+
24
+ s.close
25
+
@@ -1,10 +1,9 @@
1
1
  dir = File.dirname("__FILE__")
2
2
  require 'rubygems'
3
- require 'spec'
4
3
  require File.expand_path(dir + "../../conf/environment")
5
4
  require File.expand_path(dir + "../../lib/biodiversity/guid")
6
5
 
7
- describe LsidResolver do
6
+ describe LsidResolver do
8
7
  it "should return RFD document from lsid" do
9
8
  lsid = "urn:lsid:ubio.org:classificationbank:2232671"
10
9
  LsidResolver.resolve(lsid).class.should == "".class
@@ -1,6 +1,5 @@
1
1
  dir = File.dirname("__FILE__")
2
2
  require 'rubygems'
3
- require 'spec'
4
3
  require 'yaml'
5
4
  require 'treetop'
6
5
  require 'json'
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity19
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 7
8
- - 3
9
- version: 0.7.3
4
+ prerelease:
5
+ version: 1.0.0
10
6
  platform: ruby
11
7
  authors:
12
8
  - Dmitry Mozzherin
@@ -14,8 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-02-07 00:00:00 -05:00
18
- default_executable:
13
+ date: 2011-07-10 00:00:00 Z
19
14
  dependencies:
20
15
  - !ruby/object:Gem::Dependency
21
16
  name: treetop
@@ -25,8 +20,6 @@ dependencies:
25
20
  requirements:
26
21
  - - ">="
27
22
  - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
23
  version: "0"
31
24
  type: :runtime
32
25
  version_requirements: *id001
@@ -38,8 +31,6 @@ dependencies:
38
31
  requirements:
39
32
  - - ">="
40
33
  - !ruby/object:Gem::Version
41
- segments:
42
- - 0
43
34
  version: "0"
44
35
  type: :development
45
36
  version_requirements: *id002
@@ -55,7 +46,6 @@ extra_rdoc_files:
55
46
  - README.rdoc
56
47
  files:
57
48
  - .document
58
- - .gitignore
59
49
  - LICENSE
60
50
  - README.rdoc
61
51
  - Rakefile
@@ -63,6 +53,7 @@ files:
63
53
  - bin/nnparse
64
54
  - bin/parserver
65
55
  - conf/environment.rb
56
+ - examples/socket_client.rb
66
57
  - lib/biodiversity.rb
67
58
  - lib/biodiversity/guid.rb
68
59
  - lib/biodiversity/guid/lsid.rb
@@ -80,13 +71,12 @@ files:
80
71
  - spec/parser/spec_helper.rb
81
72
  - spec/parser/test_data.txt
82
73
  - spec/spec_helper.rb
83
- has_rdoc: true
84
74
  homepage: http://github.com/GlobalNamesArchitecture/biodiversity
85
75
  licenses: []
86
76
 
87
77
  post_install_message:
88
- rdoc_options:
89
- - --charset=UTF-8
78
+ rdoc_options: []
79
+
90
80
  require_paths:
91
81
  - lib
92
82
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -94,30 +84,19 @@ required_ruby_version: !ruby/object:Gem::Requirement
94
84
  requirements:
95
85
  - - ">="
96
86
  - !ruby/object:Gem::Version
97
- segments:
98
- - 0
99
87
  version: "0"
100
88
  required_rubygems_version: !ruby/object:Gem::Requirement
101
89
  none: false
102
90
  requirements:
103
91
  - - ">="
104
92
  - !ruby/object:Gem::Version
105
- segments:
106
- - 0
107
93
  version: "0"
108
94
  requirements: []
109
95
 
110
96
  rubyforge_project:
111
- rubygems_version: 1.3.7
97
+ rubygems_version: 1.8.5
112
98
  signing_key:
113
99
  specification_version: 3
114
100
  summary: Parser of scientific names
115
- test_files:
116
- - spec/biodiversity_spec.rb
117
- - spec/guid/lsid.spec.rb
118
- - spec/parser/scientific_name.spec.rb
119
- - spec/parser/scientific_name_canonical.spec.rb
120
- - spec/parser/scientific_name_clean.spec.rb
121
- - spec/parser/scientific_name_dirty.spec.rb
122
- - spec/parser/spec_helper.rb
123
- - spec/spec_helper.rb
101
+ test_files: []
102
+
data/.gitignore DELETED
@@ -1,16 +0,0 @@
1
- lib/biodiversity/parser/*rb
2
- *.gemspec
3
- *.sw?
4
- .DS_Store
5
- coverage
6
- rdoc
7
- pkg
8
- *.swp
9
- *.swo
10
- biodiversity*.gem
11
- *json
12
- *xml
13
- tmp
14
- .DS_Store
15
- spec/parser/test_data_new.txt
16
- t