biodiversity 1.0.10 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rvmrc +1 -1
- data/.travis.yml +7 -0
- data/CHANGELOG +42 -0
- data/Gemfile +8 -6
- data/Gemfile.lock +33 -33
- data/README.md +167 -0
- data/Rakefile +16 -11
- data/VERSION +1 -1
- data/bin/parserver +33 -44
- data/lib/biodiversity/parser.rb +160 -33
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +4 -2
- data/lib/biodiversity/parser/scientific_name_clean.treetop +479 -277
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +11 -16
- data/spec/parser/scientific_name.spec.rb +63 -7
- data/spec/parser/scientific_name_clean.spec.rb +76 -24
- data/spec/parser/scientific_name_dirty.spec.rb +4 -6
- data/spec/parser/test_data.txt +132 -41
- data/spec/parser/todo.txt +27 -0
- metadata +153 -119
- data/README.rdoc +0 -99
data/spec/parser/todo.txt
CHANGED
@@ -26,3 +26,30 @@ Polypodium lineare C.Chr. f. caudatoattenuatum Takeda Polypodium
|
|
26
26
|
|
27
27
|
Rosa gallica × tomentosa var. eglandulosa R.Keller Rosa gallica
|
28
28
|
Rosa gallica × afzeliana subsp. vosagiaca forma subcomplicata R.Keller Rosa gallica
|
29
|
+
|
30
|
+
Salmonella sp. (ser.) brancaster Macdonald, Sivell, Emms and Taylor 1948
|
31
|
+
|
32
|
+
Stanhopea tigrina Bateman ex Lindl. x S. ecornuta Lem.
|
33
|
+
|
34
|
+
|
35
|
+
Junellia o'donelli Moldenke, 1946
|
36
|
+
|
37
|
+
119052670 | Human rhinovirus A11 |
|
38
|
+
|
39
|
+
Rubus idaeus monst. obtusifolius (Willd.) Focke | Rubus idaeus
|
40
|
+
|
41
|
+
Kerana var. cameroni |Kerana var
|
42
|
+
|
43
|
+
A. alba
|
44
|
+
A.alba
|
45
|
+
|
46
|
+
Bangalaia viridis sbsp. distinctemaculata Lepesme & Breuning, 1956
|
47
|
+
|
48
|
+
Clostridium sp. enrichment culture clone M4C16
|
49
|
+
|
50
|
+
|
51
|
+
000913765 Rubus x gracilidens Sudre
|
52
|
+
Rubus gracilidens | 1
|
53
|
+
|
54
|
+
|
55
|
+
Salix myrtilloides x starkeana ssp. starkeana
|
metadata
CHANGED
@@ -1,137 +1,178 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 3.0.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 10
|
10
|
-
version: 1.0.10
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Dmitry Mozzherin
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
none: false
|
23
|
-
requirements:
|
24
|
-
- -
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
|
27
|
-
|
28
|
-
- 0
|
29
|
-
version: "0"
|
30
|
-
requirement: *id001
|
12
|
+
date: 2013-05-08 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '10.0'
|
22
|
+
type: :runtime
|
31
23
|
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '10.0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
32
31
|
name: treetop
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '1.4'
|
33
38
|
type: :runtime
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
version_requirements: &id002 !ruby/object:Gem::Requirement
|
36
|
-
none: false
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
hash: 3
|
41
|
-
segments:
|
42
|
-
- 0
|
43
|
-
version: "0"
|
44
|
-
requirement: *id002
|
45
39
|
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '1.4'
|
46
|
+
- !ruby/object:Gem::Dependency
|
46
47
|
name: parallel
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0.6'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.6'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: unicode_utils
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '1.4'
|
47
70
|
type: :runtime
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
version_requirements: &id003 !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
hash: 3
|
55
|
-
segments:
|
56
|
-
- 0
|
57
|
-
version: "0"
|
58
|
-
requirement: *id003
|
59
71
|
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '1.4'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: debugger
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '1.5'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '1.5'
|
94
|
+
- !ruby/object:Gem::Dependency
|
60
95
|
name: jeweler
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '1.8'
|
61
102
|
type: :development
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
version_requirements: &id004 !ruby/object:Gem::Requirement
|
64
|
-
none: false
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
hash: 3
|
69
|
-
segments:
|
70
|
-
- 0
|
71
|
-
version: "0"
|
72
|
-
requirement: *id004
|
73
103
|
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '1.8'
|
110
|
+
- !ruby/object:Gem::Dependency
|
74
111
|
name: treetop
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
75
118
|
type: :runtime
|
76
|
-
- !ruby/object:Gem::Dependency
|
77
|
-
version_requirements: &id005 !ruby/object:Gem::Requirement
|
78
|
-
none: false
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
hash: 3
|
83
|
-
segments:
|
84
|
-
- 0
|
85
|
-
version: "0"
|
86
|
-
requirement: *id005
|
87
119
|
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
88
127
|
name: parallel
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
89
134
|
type: :runtime
|
90
|
-
- !ruby/object:Gem::Dependency
|
91
|
-
version_requirements: &id006 !ruby/object:Gem::Requirement
|
92
|
-
none: false
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
hash: 3
|
97
|
-
segments:
|
98
|
-
- 0
|
99
|
-
version: "0"
|
100
|
-
requirement: *id006
|
101
|
-
prerelease: false
|
102
|
-
name: json
|
103
|
-
type: :runtime
|
104
|
-
- !ruby/object:Gem::Dependency
|
105
|
-
version_requirements: &id007 !ruby/object:Gem::Requirement
|
106
|
-
none: false
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
hash: 3
|
111
|
-
segments:
|
112
|
-
- 0
|
113
|
-
version: "0"
|
114
|
-
requirement: *id007
|
115
135
|
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
- !ruby/object:Gem::Dependency
|
116
143
|
name: rspec
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: '0'
|
117
150
|
type: :development
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
118
158
|
description: Tools for biodiversity informatics
|
119
159
|
email: dmozzherin@gmail.com
|
120
|
-
executables:
|
160
|
+
executables:
|
121
161
|
- nnparse
|
122
162
|
- parserver
|
123
163
|
extensions: []
|
124
|
-
|
125
|
-
extra_rdoc_files:
|
164
|
+
extra_rdoc_files:
|
126
165
|
- LICENSE
|
127
|
-
- README.
|
128
|
-
files:
|
166
|
+
- README.md
|
167
|
+
files:
|
129
168
|
- .document
|
130
169
|
- .rvmrc
|
170
|
+
- .travis.yml
|
171
|
+
- CHANGELOG
|
131
172
|
- Gemfile
|
132
173
|
- Gemfile.lock
|
133
174
|
- LICENSE
|
134
|
-
- README.
|
175
|
+
- README.md
|
135
176
|
- Rakefile
|
136
177
|
- VERSION
|
137
178
|
- bin/nnparse
|
@@ -158,36 +199,29 @@ files:
|
|
158
199
|
- spec/spec_helper.rb
|
159
200
|
homepage: http://github.com/GlobalNamesArchitecture/biodiversity
|
160
201
|
licenses: []
|
161
|
-
|
162
202
|
post_install_message:
|
163
203
|
rdoc_options: []
|
164
|
-
|
165
|
-
require_paths:
|
204
|
+
require_paths:
|
166
205
|
- lib
|
167
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
206
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
168
207
|
none: false
|
169
|
-
requirements:
|
170
|
-
- -
|
171
|
-
- !ruby/object:Gem::Version
|
172
|
-
|
173
|
-
segments:
|
208
|
+
requirements:
|
209
|
+
- - ! '>='
|
210
|
+
- !ruby/object:Gem::Version
|
211
|
+
version: '0'
|
212
|
+
segments:
|
174
213
|
- 0
|
175
|
-
|
176
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
214
|
+
hash: -3993295929745438801
|
215
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
177
216
|
none: false
|
178
|
-
requirements:
|
179
|
-
- -
|
180
|
-
- !ruby/object:Gem::Version
|
181
|
-
|
182
|
-
segments:
|
183
|
-
- 0
|
184
|
-
version: "0"
|
217
|
+
requirements:
|
218
|
+
- - ! '>='
|
219
|
+
- !ruby/object:Gem::Version
|
220
|
+
version: '0'
|
185
221
|
requirements: []
|
186
|
-
|
187
222
|
rubyforge_project:
|
188
|
-
rubygems_version: 1.8.
|
223
|
+
rubygems_version: 1.8.25
|
189
224
|
signing_key:
|
190
225
|
specification_version: 3
|
191
226
|
summary: Parser of scientific names
|
192
227
|
test_files: []
|
193
|
-
|
data/README.rdoc
DELETED
@@ -1,99 +0,0 @@
|
|
1
|
-
= Biodiversity
|
2
|
-
|
3
|
-
Parses taxonomic scientific name and breaks it into semantic elements.
|
4
|
-
|
5
|
-
== Installation
|
6
|
-
|
7
|
-
To install gem you need RubyGems >= 1.3.6
|
8
|
-
|
9
|
-
$ sudo gem install biodiversity #for ruby 1.8.x
|
10
|
-
$ sudo gem install biodiversity19 #for ruby 1.9.x
|
11
|
-
|
12
|
-
== Example usage
|
13
|
-
|
14
|
-
=== As a command line script
|
15
|
-
|
16
|
-
You can parse file with taxonomic names from command line. File should contain one scientific name per line
|
17
|
-
|
18
|
-
nnparser file_with_names
|
19
|
-
|
20
|
-
=== As a socket server
|
21
|
-
|
22
|
-
If you do not use ruby and need a fast access to the parser functionality you can use a socket server
|
23
|
-
|
24
|
-
parserver
|
25
|
-
|
26
|
-
options:
|
27
|
-
|
28
|
-
parserver --output=canonical
|
29
|
-
|
30
|
-
to return a canonical form of the name string
|
31
|
-
|
32
|
-
parserver --output=canonical_with_rank
|
33
|
-
|
34
|
-
the same as above, but infraspecies' rank is shown if available
|
35
|
-
|
36
|
-
parserver --port 5555
|
37
|
-
|
38
|
-
run socket server on a different port
|
39
|
-
|
40
|
-
Then you can access it via 4334 port using a socket client library of your programming language. You can find socket client script example in the examples directory of the gem.
|
41
|
-
|
42
|
-
If you want to check if socket server works for you:
|
43
|
-
|
44
|
-
#run server in one terminal
|
45
|
-
parserver
|
46
|
-
|
47
|
-
#in another terminal window type
|
48
|
-
telnet localhost 4334
|
49
|
-
|
50
|
-
If you enter a line with a scientific name server will send you back parsed information in json format.
|
51
|
-
|
52
|
-
To stop telnet client type any of 'end','exit','q', '.' (without quotes) instead of scientific name
|
53
|
-
|
54
|
-
$ telnet localhost 4334
|
55
|
-
Trying ::1...
|
56
|
-
Connected to localhost.
|
57
|
-
Escape character is '^]'.
|
58
|
-
Acacia abyssinica Hochst. ex Benth. ssp. calophylla Brenan
|
59
|
-
{"scientificName":{"canonical":"Acacia abyssinica calophylla","parsed":true,"parser_run":1,"verbatim":"Acacia abyssinica Hochst. ex Benth. ssp. calophylla Brenan\r\n","positions":{"0":["genus",6],"18":["author_word",25],"29":["author_word",35],"7":["species",17],"41":["infraspecies",51],"52":["author_word",58]},"hybrid":false,"normalized":"Acacia abyssinica Hochst. ex Benth. ssp. calophylla Brenan","details":[{"species":{"basionymAuthorTeam":{"exAuthorTeam":{"author":["Benth."],"authorTeam":"Benth."},"author":["Hochst."],"authorTeam":"Hochst."},"string":"abyssinica","authorship":"Hochst. ex Benth."},"infraspecies":[{"basionymAuthorTeam":{"author":["Brenan"],"authorTeam":"Brenan"},"string":"calophylla","rank":"ssp.","authorship":"Brenan"}],"genus":{"string":"Acacia"}}]}}
|
60
|
-
|
61
|
-
|
62
|
-
=== As a library
|
63
|
-
|
64
|
-
You can use it as a library
|
65
|
-
|
66
|
-
require 'biodiversity'
|
67
|
-
|
68
|
-
parser = ScientificNameParser.new
|
69
|
-
|
70
|
-
# to parse a scientific name into a ruby hash
|
71
|
-
parser.parse("Plantago major")
|
72
|
-
|
73
|
-
#to get json representation
|
74
|
-
parser.parse("Plantago").to_json
|
75
|
-
#or
|
76
|
-
parser.parse("Plantago")
|
77
|
-
parser.all_json
|
78
|
-
|
79
|
-
# to clean name up
|
80
|
-
parser.parse(" Plantago major ")[:scientificName][:normalized]
|
81
|
-
|
82
|
-
# to get only cleaned up latin part of the name
|
83
|
-
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:canonical]
|
84
|
-
|
85
|
-
# to get detailed information about elements of the name
|
86
|
-
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
|
87
|
-
|
88
|
-
# to parse using several CPUs (4 seem to be optimal)
|
89
|
-
parser = ParallelParser.new # ParallelParser.new(4) will try to run 4 processes if hardware allows
|
90
|
-
array_of_names = ["Betula alba", "Homo sapiens"....]
|
91
|
-
parser.parse(array_of_names) # -> {"Betula alba" => {:scientificName...}, "Homo sapiens" => {:scientificName...}, ...}
|
92
|
-
|
93
|
-
parallel parser takes list of names and returns back a hash with names as keys and parsed data as values
|
94
|
-
|
95
|
-
# to resolve lsid and get back RDF file
|
96
|
-
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
97
|
-
|
98
|
-
Copyright (c) 2009-2011 Marine Biological Laboratory. See LICENSE.txt for
|
99
|
-
further details.
|