biodiversity 1.0.10 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rvmrc +1 -1
- data/.travis.yml +7 -0
- data/CHANGELOG +42 -0
- data/Gemfile +8 -6
- data/Gemfile.lock +33 -33
- data/README.md +167 -0
- data/Rakefile +16 -11
- data/VERSION +1 -1
- data/bin/parserver +33 -44
- data/lib/biodiversity/parser.rb +160 -33
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +4 -2
- data/lib/biodiversity/parser/scientific_name_clean.treetop +479 -277
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +11 -16
- data/spec/parser/scientific_name.spec.rb +63 -7
- data/spec/parser/scientific_name_clean.spec.rb +76 -24
- data/spec/parser/scientific_name_dirty.spec.rb +4 -6
- data/spec/parser/test_data.txt +132 -41
- data/spec/parser/todo.txt +27 -0
- metadata +153 -119
- data/README.rdoc +0 -99
data/.rvmrc
CHANGED
@@ -1 +1 @@
|
|
1
|
-
rvm use ruby-1.9.
|
1
|
+
rvm use ruby-1.9.3-p392@biodiversity --create
|
data/.travis.yml
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
3.0.0 -- removing support for ruby 1.8.7, making biodiversity gem be the same
|
2
|
+
as biodiversity19, deprecating biodiversity19. A few newly discovered bugs
|
3
|
+
are fixed.
|
4
|
+
|
5
|
+
2.1.0 -- added ScientificNameParser.version method
|
6
|
+
|
7
|
+
2.0.0 -- backward incompatibe change in parserver, therefore new major number.
|
8
|
+
In parserver removed option --output=canonical_with_rank, instead added -r
|
9
|
+
option which allows to have canonical with rank with either json or canonical
|
10
|
+
outputs
|
11
|
+
|
12
|
+
1.2.0 -- changed method invocation signature ScientificNameParser.new
|
13
|
+
Now it can take options
|
14
|
+
|
15
|
+
1.1.3 -- added 'fo' as rank
|
16
|
+
|
17
|
+
1.1.2 -- static method for fixins all-caps canonical names, fixing caps
|
18
|
+
for authors words, ampersand instead of 'et' in normalization
|
19
|
+
|
20
|
+
1.1.1 -- more multi-uninomials cases, expanded viruses detection, added
|
21
|
+
abbreviated genera and a few small fixes
|
22
|
+
|
23
|
+
1.1.0 -- added multi-uninomials, fixes in identification annotations (aff.,
|
24
|
+
cf., sp. etc), bug fixes, more robust salvage mode
|
25
|
+
|
26
|
+
1.0.17 -- fixed a bug which prevented all diacritics be converted correctly
|
27
|
+
|
28
|
+
1.0.16 -- dirty mode now converts ë to e
|
29
|
+
|
30
|
+
1.0.15 -- additional rules added for names ending with ssp. sp sp. and cf.
|
31
|
+
|
32
|
+
1.0.14 -- canonical forms had allowed ë as a character until now. After this
|
33
|
+
version the only utf-8 character allowed in canonical forms should be the
|
34
|
+
multiplication sign for hybrids.
|
35
|
+
|
36
|
+
1.0.13 -- canonical forms for cf. aff. qualifiers are modified: canonical for
|
37
|
+
'Aus cf. bus' is now 'Aus bus'; canonical for 'Aus aff. bus' is now 'Aus'.
|
38
|
+
Ranks at the end of the name like 'var', 'ssp', 'spp' are considered junk and
|
39
|
+
are ignored
|
40
|
+
|
41
|
+
1.0.12 -- bug is fixed which prevented 'Cucurbita pepo' be parsed correctly,
|
42
|
+
f., forma, fr. are now treated as any other ranks.
|
data/Gemfile
CHANGED
@@ -1,13 +1,15 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
gem
|
4
|
-
gem
|
3
|
+
gem 'rake', '~> 10.0'
|
4
|
+
gem 'treetop', '~> 1.4'
|
5
|
+
gem 'parallel', '~> 0.6'
|
6
|
+
gem 'unicode_utils', '~> 1.4'
|
5
7
|
|
6
8
|
group :development do
|
7
|
-
gem
|
9
|
+
gem 'debugger', '~> 1.5'
|
10
|
+
gem 'jeweler', '~> 1.8'
|
8
11
|
end
|
9
12
|
|
10
13
|
group :test do
|
11
|
-
gem
|
12
|
-
gem "rspec"
|
14
|
+
gem 'rspec', '~> 2.13'
|
13
15
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,47 +1,47 @@
|
|
1
1
|
GEM
|
2
|
-
remote:
|
2
|
+
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
columnize (0.3.6)
|
5
|
+
debugger (1.5.0)
|
6
|
+
columnize (>= 0.3.1)
|
7
|
+
debugger-linecache (~> 1.2.0)
|
8
|
+
debugger-ruby_core_source (~> 1.2.0)
|
9
|
+
debugger-linecache (1.2.0)
|
10
|
+
debugger-ruby_core_source (1.2.0)
|
11
|
+
diff-lcs (1.2.1)
|
7
12
|
git (1.2.5)
|
8
|
-
jeweler (1.
|
13
|
+
jeweler (1.8.4)
|
9
14
|
bundler (~> 1.0)
|
10
15
|
git (>= 1.2.5)
|
11
16
|
rake
|
12
|
-
|
13
|
-
|
14
|
-
parallel (0.
|
17
|
+
rdoc
|
18
|
+
json (1.7.7)
|
19
|
+
parallel (0.6.2)
|
15
20
|
polyglot (0.3.3)
|
16
|
-
rake (0.
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
rspec-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
rspec-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
ruby_core_source (>= 0.1.4)
|
29
|
-
ruby-debug19 (0.11.6)
|
30
|
-
columnize (>= 0.3.1)
|
31
|
-
linecache19 (>= 0.5.11)
|
32
|
-
ruby-debug-base19 (>= 0.11.19)
|
33
|
-
ruby_core_source (0.1.5)
|
34
|
-
archive-tar-minitar (>= 0.5.2)
|
35
|
-
treetop (1.4.10)
|
21
|
+
rake (10.0.3)
|
22
|
+
rdoc (4.0.0)
|
23
|
+
json (~> 1.4)
|
24
|
+
rspec (2.13.0)
|
25
|
+
rspec-core (~> 2.13.0)
|
26
|
+
rspec-expectations (~> 2.13.0)
|
27
|
+
rspec-mocks (~> 2.13.0)
|
28
|
+
rspec-core (2.13.1)
|
29
|
+
rspec-expectations (2.13.0)
|
30
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
31
|
+
rspec-mocks (2.13.0)
|
32
|
+
treetop (1.4.12)
|
36
33
|
polyglot
|
37
34
|
polyglot (>= 0.3.1)
|
35
|
+
unicode_utils (1.4.0)
|
38
36
|
|
39
37
|
PLATFORMS
|
40
38
|
ruby
|
41
39
|
|
42
40
|
DEPENDENCIES
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
41
|
+
debugger (~> 1.5)
|
42
|
+
jeweler (~> 1.8)
|
43
|
+
parallel (~> 0.6)
|
44
|
+
rake (~> 10.0)
|
45
|
+
rspec (~> 2.13)
|
46
|
+
treetop (~> 1.4)
|
47
|
+
unicode_utils (~> 1.4)
|
data/README.md
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
Biodiversity
|
2
|
+
============
|
3
|
+
|
4
|
+
[![Gem Version][1]][2]
|
5
|
+
[![Continuous Integration Status][3]][4]
|
6
|
+
[![CodePolice][5]][6]
|
7
|
+
[![Dependency Status][7]][8]
|
8
|
+
|
9
|
+
Parses taxonomic scientific name and breaks it into semantic elements.
|
10
|
+
|
11
|
+
*WARNING, IMPORTANT!:*
|
12
|
+
Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
|
13
|
+
biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
|
14
|
+
|
15
|
+
biodiversity19 is now deprecated and will be phased out in a couple of years.
|
16
|
+
You are strongly encouraged to change your dependencies from
|
17
|
+
biodiversity19 to biodiversity
|
18
|
+
|
19
|
+
Installation
|
20
|
+
------------
|
21
|
+
|
22
|
+
sudo gem install biodiversity
|
23
|
+
|
24
|
+
Example usage
|
25
|
+
-------------
|
26
|
+
|
27
|
+
### As a command line script
|
28
|
+
|
29
|
+
You can parse file with taxonomic names from command line.
|
30
|
+
File should contain one scientific name per line
|
31
|
+
|
32
|
+
nnparse file_with_names
|
33
|
+
|
34
|
+
The resuls will be put into parsed.json file in the current directory.
|
35
|
+
To save results into a different file:
|
36
|
+
|
37
|
+
nnparse file_with_names output_file
|
38
|
+
|
39
|
+
### As a socket server
|
40
|
+
|
41
|
+
If you do not use Ruby and need a fast access to the parser functionality
|
42
|
+
you can use a socket server
|
43
|
+
|
44
|
+
parserver
|
45
|
+
|
46
|
+
parserver -h
|
47
|
+
Usage: parserver [options]
|
48
|
+
|
49
|
+
-r, --canonical_with_rank Adds infraspecies rank to canonical forms
|
50
|
+
|
51
|
+
-o, --output=output Specifies the type of the output:
|
52
|
+
json - parsed results in json
|
53
|
+
canonical - canonical form only
|
54
|
+
Default: json
|
55
|
+
|
56
|
+
-p, --port=port Specifies the port number
|
57
|
+
Default: 4334
|
58
|
+
|
59
|
+
-h, --help Show this help message.
|
60
|
+
|
61
|
+
parserver --output=canonical
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
With default settings you can access parserserver via 4334 port using a
|
66
|
+
socket client library of your programming language. You can find
|
67
|
+
[socket client script example][9] in the examples directory of the gem.
|
68
|
+
|
69
|
+
If you want to check if socket server works for you:
|
70
|
+
|
71
|
+
#run server in one terminal
|
72
|
+
parserver
|
73
|
+
|
74
|
+
#in another terminal window type
|
75
|
+
telnet localhost 4334
|
76
|
+
|
77
|
+
If you enter a line with a scientific name -- server will send you back
|
78
|
+
parsed information in json format.
|
79
|
+
|
80
|
+
To stop telnet client type any of `end`,`exit`,`q`, `.` instead
|
81
|
+
of scientific name
|
82
|
+
|
83
|
+
$ telnet localhost 4334
|
84
|
+
Trying ::1...
|
85
|
+
Connected to localhost.
|
86
|
+
Escape character is '^]'.
|
87
|
+
Acacia abyssinica Hochst. ex Benth. ssp. calophylla Brenan
|
88
|
+
{"scientificName":{"canonical":"Acacia abyssinica calophylla"...}}
|
89
|
+
end
|
90
|
+
|
91
|
+
### As a library
|
92
|
+
|
93
|
+
You can use it as a library in Ruby, JRuby etc.
|
94
|
+
|
95
|
+
require 'biodiversity'
|
96
|
+
|
97
|
+
parser = ScientificNameParser.new
|
98
|
+
|
99
|
+
#to find version number
|
100
|
+
ScientificNameParser.version
|
101
|
+
|
102
|
+
# to fix capitalization in canonicals
|
103
|
+
ScientificNameParser.fix_case("QUERCUS (QUERCUS) ALBA")
|
104
|
+
# Output: Quercus (Quercus) alba
|
105
|
+
|
106
|
+
# to parse a scientific name into a ruby hash
|
107
|
+
parser.parse("Plantago major")
|
108
|
+
|
109
|
+
#to get json representation
|
110
|
+
parser.parse("Plantago").to_json
|
111
|
+
#or
|
112
|
+
parser.parse("Plantago")
|
113
|
+
parser.all_json
|
114
|
+
|
115
|
+
# to clean name up
|
116
|
+
parser.parse(" Plantago major ")[:scientificName][:normalized]
|
117
|
+
|
118
|
+
# to get only cleaned up latin part of the name
|
119
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:canonical]
|
120
|
+
|
121
|
+
# to get detailed information about elements of the name
|
122
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
|
123
|
+
|
124
|
+
Returned result is not always linear, if name is complex. To get simple linear
|
125
|
+
representation of the name you can use:
|
126
|
+
|
127
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:position]
|
128
|
+
# returns {0=>["genus", 16], 17=>["species", 26],
|
129
|
+
# 28=>["author_word", 32], 33=>["author_word", 40],
|
130
|
+
# 42=>["author_word", 44], 45=>["author_word", 50],
|
131
|
+
# 53=>["author_word", 58], 59=>["year", 63]}
|
132
|
+
# where the key is the char index of the start of
|
133
|
+
# a word, first element of the value is a semantic meaning
|
134
|
+
# of the word, second element of the value is the character index
|
135
|
+
# of end of the word
|
136
|
+
|
137
|
+
To parse using several CPUs (4 seem to be optimal)
|
138
|
+
|
139
|
+
parser = ParallelParser.new
|
140
|
+
# ParallelParser.new(4) will try to run 4 processes if hardware allows
|
141
|
+
array_of_names = ["Betula alba", "Homo sapiens"....]
|
142
|
+
parser.parse(array_of_names)
|
143
|
+
# Output: {"Betula alba" => {:scientificName...}, "Homo sapiens" => {:scientificName...}, ...}
|
144
|
+
|
145
|
+
parallel parser takes list of names and returns back a hash with names as keys and parsed data as values
|
146
|
+
|
147
|
+
To get canonicals with ranks for infraspecific epithets:
|
148
|
+
|
149
|
+
parser = ScientificNameParser.new(canonical_with_rank: true)
|
150
|
+
parser.parse('Cola cordifolia var. puberula A. Chev.')[:scientificName][:canonical]
|
151
|
+
# Output: Cola cordifolia var. puberula
|
152
|
+
|
153
|
+
To resolve lsid and get back RDF file
|
154
|
+
|
155
|
+
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
[1]: https://badge.fury.io/rb/biodiversity19.png
|
160
|
+
[2]: http://badge.fury.io/rb/biodiversity19
|
161
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.png
|
162
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
|
163
|
+
[5]: https://codeclimate.com/github/GlobalNamesArchitecture/biodiversity.png
|
164
|
+
[6]: https://codeclimate.com/github/GlobalNamesArchitecture/biodiversity
|
165
|
+
[7]: https://gemnasium.com/GlobalNamesArchitecture/biodiversity.png
|
166
|
+
[8]: https://gemnasium.com/GlobalNamesArchitecture/biodiversity
|
167
|
+
[9]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/examples/socket_client.rb
|
data/Rakefile
CHANGED
@@ -20,35 +20,41 @@ ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
|
|
20
20
|
begin
|
21
21
|
require 'jeweler'
|
22
22
|
Jeweler::Tasks.new do |gem|
|
23
|
-
gem.name =
|
23
|
+
gem.name = 'biodiversity'
|
24
|
+
#To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
|
24
25
|
gem.summary = 'Parser of scientific names'
|
25
26
|
gem.description = 'Tools for biodiversity informatics'
|
26
|
-
gem.email =
|
27
|
-
gem.homepage =
|
28
|
-
gem.authors = [
|
27
|
+
gem.email = 'dmozzherin@gmail.com'
|
28
|
+
gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
|
29
|
+
gem.authors = ['Dmitry Mozzherin']
|
29
30
|
gem.has_rdoc = false
|
30
31
|
gem.bindir = 'bin'
|
31
32
|
gem.executables = ['nnparse', 'parserver']
|
32
33
|
gem.add_dependency('treetop')
|
33
34
|
gem.add_dependency('parallel')
|
34
|
-
gem.add_dependency('json') if ruby_version < 19
|
35
|
+
# gem.add_dependency('json') if ruby_version < 19
|
35
36
|
gem.add_development_dependency "rspec"
|
36
|
-
# gem is a Gem::Specification...
|
37
|
+
# gem is a Gem::Specification...
|
38
|
+
# see http://www.rubygems.org/read/chapter/20 for additional settings
|
37
39
|
end
|
38
40
|
rescue LoadError
|
39
|
-
puts
|
41
|
+
puts 'Jeweler (or a dependency) not available. ' +
|
42
|
+
'Install it with: sudo gem install jeweler'
|
40
43
|
end
|
41
44
|
|
42
45
|
task :tt do
|
43
|
-
['scientific_name_clean',
|
46
|
+
['scientific_name_clean',
|
47
|
+
'scientific_name_dirty',
|
48
|
+
'scientific_name_canonical'].each do |f|
|
44
49
|
file = "#{dir}/lib/biodiversity/parser/#{f}"
|
45
50
|
FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
|
46
51
|
system("tt #{file}.treetop")
|
47
52
|
rf = "#{file}.rb"
|
48
|
-
rfn = open(rf +
|
53
|
+
rfn = open(rf + '.tmp', 'w')
|
49
54
|
skip_head = false
|
50
55
|
f = open(rf)
|
51
|
-
#getting around a bug in treetop which prevents setting
|
56
|
+
# getting around a bug in treetop which prevents setting
|
57
|
+
# UTF-8 encoding in ruby19
|
52
58
|
f.each_with_index do |l, i|
|
53
59
|
skip_head = l.match(/^# Autogenerated/) if i == 0
|
54
60
|
if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
|
@@ -63,4 +69,3 @@ task :tt do
|
|
63
69
|
`mv #{rf}.tmp #{rf}`
|
64
70
|
end
|
65
71
|
end
|
66
|
-
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
data/bin/parserver
CHANGED
@@ -5,78 +5,67 @@ require 'socket'
|
|
5
5
|
require 'biodiversity' # Get sockets from stdlib
|
6
6
|
|
7
7
|
DEFAULT_PORT = 4334
|
8
|
-
RUBY_VERSION_INT = RUBY_VERSION.split(
|
8
|
+
RUBY_VERSION_INT = RUBY_VERSION.split('.')[0..1].join('').to_i
|
9
9
|
OPTIONS = {
|
10
|
-
:
|
11
|
-
:
|
10
|
+
output: 'json',
|
11
|
+
canonical_with_rank: false,
|
12
|
+
port: DEFAULT_PORT
|
12
13
|
}
|
13
14
|
|
14
15
|
options = {}
|
15
16
|
ARGV.options do |opts|
|
16
17
|
script_name = File.basename($0)
|
17
|
-
opts.banner = "Usage:
|
18
|
+
opts.banner = "Usage: #{script_name} [options]"
|
18
19
|
|
19
|
-
opts.separator
|
20
|
+
opts.separator ''
|
20
21
|
|
21
|
-
opts.on(
|
22
|
-
|
22
|
+
opts.on('-r',
|
23
|
+
'--canonical_with_rank',
|
24
|
+
'Adds infraspecies rank to canonical forms'
|
25
|
+
) { |rank| options[:canonical_with_rank] = rank }
|
26
|
+
|
27
|
+
opts.separator ''
|
28
|
+
|
29
|
+
opts.on('-o', '--output=output', String,
|
30
|
+
'Specifies the type of the output:
|
23
31
|
json - parsed results in json
|
24
|
-
canonical - canonical
|
25
|
-
|
26
|
-
"Default: json") { |output| options[:output] = output }
|
32
|
+
canonical - canonical form only',
|
33
|
+
'Default: json') { |output| options[:output] = output }
|
27
34
|
|
28
|
-
opts.separator
|
35
|
+
opts.separator ''
|
29
36
|
|
30
|
-
opts.on(
|
31
|
-
|
37
|
+
opts.on('-p', '--port=port', String,
|
38
|
+
'Specifies the port number',
|
32
39
|
"Default: #{DEFAULT_PORT}") { |port| options[:port] = port }
|
33
40
|
|
34
|
-
opts.separator
|
41
|
+
opts.separator ''
|
35
42
|
|
36
|
-
opts.on(
|
37
|
-
|
43
|
+
opts.on('-h', '--help',
|
44
|
+
'Show this help message.') { puts opts; exit }
|
38
45
|
|
39
46
|
opts.parse!
|
40
47
|
end
|
41
48
|
|
42
|
-
OPTIONS[:output] = options[:output] if ['canonical'
|
49
|
+
OPTIONS[:output] = options[:output] if ['canonical'].include?(options[:output])
|
43
50
|
OPTIONS[:port] = options[:port].to_i if options[:port].to_i > 0
|
44
|
-
|
45
|
-
def parser_error(name_string)
|
46
|
-
{:scientificName => {:parsed => false, :verbatim => name_string, :error => 'Parser error'}}
|
47
|
-
end
|
51
|
+
OPTIONS[:canonical_with_rank] = !!options[:canonical_with_rank]
|
48
52
|
|
49
53
|
def get_output(name_string, parser)
|
50
54
|
begin
|
51
|
-
if RUBY_VERSION_INT < 19
|
52
|
-
old_kcode = $KCODE
|
53
|
-
$KCODE = 'NONE'
|
54
|
-
end
|
55
55
|
parsed = parser.parse(name_string)
|
56
|
-
if RUBY_VERSION_INT < 19
|
57
|
-
$KCODE = old_kcode
|
58
|
-
end
|
59
56
|
rescue
|
60
|
-
parsed =
|
57
|
+
parsed = ScientificNameParser::FAILED_RESULT.(name_string)
|
61
58
|
end
|
62
59
|
output = OPTIONS[:output]
|
63
60
|
return parsed.to_json if output == 'json'
|
64
|
-
|
65
|
-
return canonical.to_s if output == 'canonical' || canonical == nil || parsed[:scientificName][:hybrid] || !parsed[:scientificName][:parsed]
|
66
|
-
parts = parsed[:scientificName][:canonical].split(" ")
|
67
|
-
|
68
|
-
if parts.size > 2 && parsed[:scientificName][:details][0][:infraspecies]
|
69
|
-
name_ary = parts[0..1]
|
70
|
-
parsed[:scientificName][:details][0][:infraspecies].each do |data|
|
71
|
-
name_ary << (data[:rank] && data[:rank] != 'n/a'? "#{data[:rank]} #{data[:string]}" : data[:string])
|
72
|
-
end
|
73
|
-
canonical = name_ary.join(" ")
|
74
|
-
end
|
75
|
-
canonical
|
61
|
+
parsed[:scientificName][:canonical].to_s
|
76
62
|
end
|
77
63
|
|
78
|
-
puts "Running parser service on port
|
79
|
-
|
64
|
+
puts "Running parser service on port %s, output type is '%s'" %
|
65
|
+
[OPTIONS[:port], OPTIONS[:output]]
|
66
|
+
opts = {}
|
67
|
+
opts = {canonical_with_rank: true} if OPTIONS[:canonical_with_rank]
|
68
|
+
parser = ScientificNameParser.new(opts)
|
80
69
|
server = TCPServer.open(OPTIONS[:port]) # Socket to listen on a port
|
81
70
|
loop do # Servers run forever
|
82
71
|
Thread.start(server.accept) do |client|
|
@@ -85,7 +74,7 @@ loop do # Servers run forever
|
|
85
74
|
while a = client.readline rescue nil
|
86
75
|
count += 1
|
87
76
|
puts "parsed %s'th name" % count if count % 1000 == 0
|
88
|
-
a.force_encoding(
|
77
|
+
a.force_encoding('utf-8') if a && RUBY_VERSION_INT >= 19
|
89
78
|
if ['end','exit','q', '.'].include? a.strip
|
90
79
|
client.close
|
91
80
|
break
|