biodiversity 0.5.14 → 0.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/biodiversity.gemspec +2 -5
- data/lib/biodiversity/parser/scientific_name_canonical.rb +3 -3
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +4 -4
- data/lib/biodiversity/parser/scientific_name_clean.rb +58 -58
- data/lib/biodiversity/parser/scientific_name_clean.treetop +25 -25
- data/lib/biodiversity/parser/scientific_name_dirty.rb +9 -9
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +5 -5
- data/spec/parser/scientific_name_canonical.spec.rb +6 -6
- data/spec/parser/scientific_name_clean.spec.rb +75 -75
- data/spec/parser/scientific_name_dirty.spec.rb +14 -14
- data/spec/parser/test_data.txt +148 -148
- metadata +5 -17
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.5.
|
|
1
|
+
0.5.15
|
data/biodiversity.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{biodiversity}
|
|
8
|
-
s.version = "0.5.
|
|
8
|
+
s.version = "0.5.15"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Dmitry Mozzherin"]
|
|
12
|
-
s.date = %q{2010-03-
|
|
12
|
+
s.date = %q{2010-03-24}
|
|
13
13
|
s.default_executable = %q{nnparse}
|
|
14
14
|
s.description = %q{Tools for biodiversity informatics}
|
|
15
15
|
s.email = %q{dmozzherin@gmail.com}
|
|
@@ -72,16 +72,13 @@ Gem::Specification.new do |s|
|
|
|
72
72
|
|
|
73
73
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
74
74
|
s.add_runtime_dependency(%q<treetop>, [">= 0"])
|
|
75
|
-
s.add_runtime_dependency(%q<json>, [">= 0"])
|
|
76
75
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
|
77
76
|
else
|
|
78
77
|
s.add_dependency(%q<treetop>, [">= 0"])
|
|
79
|
-
s.add_dependency(%q<json>, [">= 0"])
|
|
80
78
|
s.add_dependency(%q<rspec>, [">= 0"])
|
|
81
79
|
end
|
|
82
80
|
else
|
|
83
81
|
s.add_dependency(%q<treetop>, [">= 0"])
|
|
84
|
-
s.add_dependency(%q<json>, [">= 0"])
|
|
85
82
|
s.add_dependency(%q<rspec>, [">= 0"])
|
|
86
83
|
end
|
|
87
84
|
end
|
|
@@ -191,7 +191,7 @@ module ScientificNameCanonical
|
|
|
191
191
|
r3 = _nt_space
|
|
192
192
|
s1 << r3
|
|
193
193
|
if r3
|
|
194
|
-
r4 =
|
|
194
|
+
r4 = _nt_infragenus
|
|
195
195
|
s1 << r4
|
|
196
196
|
if r4
|
|
197
197
|
r5 = _nt_space
|
|
@@ -225,7 +225,7 @@ module ScientificNameCanonical
|
|
|
225
225
|
r10 = _nt_space
|
|
226
226
|
s8 << r10
|
|
227
227
|
if r10
|
|
228
|
-
r11 =
|
|
228
|
+
r11 = _nt_infragenus
|
|
229
229
|
s8 << r11
|
|
230
230
|
if r11
|
|
231
231
|
r12 = _nt_garbage
|
|
@@ -318,7 +318,7 @@ module ScientificNameCanonical
|
|
|
318
318
|
end
|
|
319
319
|
|
|
320
320
|
i0, s0 = index, []
|
|
321
|
-
r1 =
|
|
321
|
+
r1 = _nt_uninomial_string
|
|
322
322
|
s0 << r1
|
|
323
323
|
if r1
|
|
324
324
|
r2 = _nt_garbage
|
|
@@ -27,7 +27,7 @@ grammar ScientificNameCanonical
|
|
|
27
27
|
|
|
28
28
|
rule multinomial_with_garbage
|
|
29
29
|
|
|
30
|
-
a:genus space b:
|
|
30
|
+
a:genus space b:infragenus space c:species garbage {
|
|
31
31
|
def value
|
|
32
32
|
a.value + " " + b.value + " " + c.value
|
|
33
33
|
end
|
|
@@ -45,7 +45,7 @@ grammar ScientificNameCanonical
|
|
|
45
45
|
end
|
|
46
46
|
}
|
|
47
47
|
/
|
|
48
|
-
a:genus space b:
|
|
48
|
+
a:genus space b:infragenus garbage {
|
|
49
49
|
def value
|
|
50
50
|
a.value + " " + b.value
|
|
51
51
|
end
|
|
@@ -83,7 +83,7 @@ grammar ScientificNameCanonical
|
|
|
83
83
|
end
|
|
84
84
|
|
|
85
85
|
rule uninomial_with_garbage
|
|
86
|
-
a:
|
|
86
|
+
a:uninomial_string b:garbage {
|
|
87
87
|
def value
|
|
88
88
|
a.value
|
|
89
89
|
end
|
|
@@ -108,4 +108,4 @@ grammar ScientificNameCanonical
|
|
|
108
108
|
space_hard [^ш]+
|
|
109
109
|
end
|
|
110
110
|
|
|
111
|
-
end
|
|
111
|
+
end
|
|
@@ -866,7 +866,7 @@ module ScientificNameClean
|
|
|
866
866
|
r3 = _nt_space
|
|
867
867
|
s1 << r3
|
|
868
868
|
if r3
|
|
869
|
-
r4 =
|
|
869
|
+
r4 = _nt_infragenus
|
|
870
870
|
s1 << r4
|
|
871
871
|
if r4
|
|
872
872
|
r5 = _nt_space
|
|
@@ -917,7 +917,7 @@ module ScientificNameClean
|
|
|
917
917
|
r14 = _nt_space
|
|
918
918
|
s12 << r14
|
|
919
919
|
if r14
|
|
920
|
-
r15 =
|
|
920
|
+
r15 = _nt_infragenus
|
|
921
921
|
s12 << r15
|
|
922
922
|
if r15
|
|
923
923
|
r16 = _nt_space
|
|
@@ -1171,7 +1171,7 @@ module ScientificNameClean
|
|
|
1171
1171
|
|
|
1172
1172
|
i0 = index
|
|
1173
1173
|
i1, s1 = index, []
|
|
1174
|
-
r2 =
|
|
1174
|
+
r2 = _nt_infraspecies_string
|
|
1175
1175
|
s1 << r2
|
|
1176
1176
|
if r2
|
|
1177
1177
|
r3 = _nt_space
|
|
@@ -1192,7 +1192,7 @@ module ScientificNameClean
|
|
|
1192
1192
|
if r1
|
|
1193
1193
|
r0 = r1
|
|
1194
1194
|
else
|
|
1195
|
-
r5 =
|
|
1195
|
+
r5 = _nt_infraspecies_string
|
|
1196
1196
|
if r5
|
|
1197
1197
|
r0 = r5
|
|
1198
1198
|
else
|
|
@@ -1206,7 +1206,7 @@ module ScientificNameClean
|
|
|
1206
1206
|
r0
|
|
1207
1207
|
end
|
|
1208
1208
|
|
|
1209
|
-
module
|
|
1209
|
+
module Infraspeciesstring0
|
|
1210
1210
|
def sel
|
|
1211
1211
|
elements[0]
|
|
1212
1212
|
end
|
|
@@ -1220,7 +1220,7 @@ module ScientificNameClean
|
|
|
1220
1220
|
end
|
|
1221
1221
|
end
|
|
1222
1222
|
|
|
1223
|
-
module
|
|
1223
|
+
module Infraspeciesstring1
|
|
1224
1224
|
def value
|
|
1225
1225
|
sel.apply(a)
|
|
1226
1226
|
end
|
|
@@ -1237,14 +1237,14 @@ module ScientificNameClean
|
|
|
1237
1237
|
end
|
|
1238
1238
|
end
|
|
1239
1239
|
|
|
1240
|
-
module
|
|
1240
|
+
module Infraspeciesstring2
|
|
1241
1241
|
def species_word
|
|
1242
1242
|
elements[0]
|
|
1243
1243
|
end
|
|
1244
1244
|
|
|
1245
1245
|
end
|
|
1246
1246
|
|
|
1247
|
-
module
|
|
1247
|
+
module Infraspeciesstring3
|
|
1248
1248
|
def value
|
|
1249
1249
|
text_value
|
|
1250
1250
|
end
|
|
@@ -1258,14 +1258,14 @@ module ScientificNameClean
|
|
|
1258
1258
|
end
|
|
1259
1259
|
|
|
1260
1260
|
def details
|
|
1261
|
-
{:infraspecies => {:
|
|
1261
|
+
{:infraspecies => {:string => value, :rank => 'n/a'}}
|
|
1262
1262
|
end
|
|
1263
1263
|
end
|
|
1264
1264
|
|
|
1265
|
-
def
|
|
1265
|
+
def _nt_infraspecies_string
|
|
1266
1266
|
start_index = index
|
|
1267
|
-
if node_cache[:
|
|
1268
|
-
cached = node_cache[:
|
|
1267
|
+
if node_cache[:infraspecies_string].has_key?(index)
|
|
1268
|
+
cached = node_cache[:infraspecies_string][index]
|
|
1269
1269
|
@index = cached.interval.end if cached
|
|
1270
1270
|
return cached
|
|
1271
1271
|
end
|
|
@@ -1284,8 +1284,8 @@ module ScientificNameClean
|
|
|
1284
1284
|
end
|
|
1285
1285
|
if s1.last
|
|
1286
1286
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
1287
|
-
r1.extend(
|
|
1288
|
-
r1.extend(
|
|
1287
|
+
r1.extend(Infraspeciesstring0)
|
|
1288
|
+
r1.extend(Infraspeciesstring1)
|
|
1289
1289
|
else
|
|
1290
1290
|
@index = i1
|
|
1291
1291
|
r1 = nil
|
|
@@ -1314,8 +1314,8 @@ module ScientificNameClean
|
|
|
1314
1314
|
end
|
|
1315
1315
|
if s5.last
|
|
1316
1316
|
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
|
1317
|
-
r5.extend(
|
|
1318
|
-
r5.extend(
|
|
1317
|
+
r5.extend(Infraspeciesstring2)
|
|
1318
|
+
r5.extend(Infraspeciesstring3)
|
|
1319
1319
|
else
|
|
1320
1320
|
@index = i5
|
|
1321
1321
|
r5 = nil
|
|
@@ -1328,7 +1328,7 @@ module ScientificNameClean
|
|
|
1328
1328
|
end
|
|
1329
1329
|
end
|
|
1330
1330
|
|
|
1331
|
-
node_cache[:
|
|
1331
|
+
node_cache[:infraspecies_string][start_index] = r0
|
|
1332
1332
|
|
|
1333
1333
|
r0
|
|
1334
1334
|
end
|
|
@@ -1400,7 +1400,7 @@ module ScientificNameClean
|
|
|
1400
1400
|
end
|
|
1401
1401
|
|
|
1402
1402
|
def details(a = nil)
|
|
1403
|
-
{:infraspecies => {:
|
|
1403
|
+
{:infraspecies => {:string => (a.value rescue nil), :rank => text_value}}
|
|
1404
1404
|
end
|
|
1405
1405
|
end
|
|
1406
1406
|
|
|
@@ -1864,7 +1864,7 @@ module ScientificNameClean
|
|
|
1864
1864
|
" " + a.value
|
|
1865
1865
|
end
|
|
1866
1866
|
def details(a = nil)
|
|
1867
|
-
{:infraspecies => {:
|
|
1867
|
+
{:infraspecies => {:string => (a.value rescue nil), :rank => value}}
|
|
1868
1868
|
end
|
|
1869
1869
|
end
|
|
1870
1870
|
|
|
@@ -1979,7 +1979,7 @@ module ScientificNameClean
|
|
|
1979
1979
|
|
|
1980
1980
|
i0 = index
|
|
1981
1981
|
i1, s1 = index, []
|
|
1982
|
-
r2 =
|
|
1982
|
+
r2 = _nt_species_string
|
|
1983
1983
|
s1 << r2
|
|
1984
1984
|
if r2
|
|
1985
1985
|
r3 = _nt_space
|
|
@@ -2000,7 +2000,7 @@ module ScientificNameClean
|
|
|
2000
2000
|
if r1
|
|
2001
2001
|
r0 = r1
|
|
2002
2002
|
else
|
|
2003
|
-
r5 =
|
|
2003
|
+
r5 = _nt_species_string
|
|
2004
2004
|
if r5
|
|
2005
2005
|
r0 = r5
|
|
2006
2006
|
else
|
|
@@ -2014,7 +2014,7 @@ module ScientificNameClean
|
|
|
2014
2014
|
r0
|
|
2015
2015
|
end
|
|
2016
2016
|
|
|
2017
|
-
module
|
|
2017
|
+
module Speciesstring0
|
|
2018
2018
|
def space_hard
|
|
2019
2019
|
elements[0]
|
|
2020
2020
|
end
|
|
@@ -2028,14 +2028,14 @@ module ScientificNameClean
|
|
|
2028
2028
|
end
|
|
2029
2029
|
end
|
|
2030
2030
|
|
|
2031
|
-
module
|
|
2031
|
+
module Speciesstring1
|
|
2032
2032
|
def a
|
|
2033
2033
|
elements[0]
|
|
2034
2034
|
end
|
|
2035
2035
|
|
|
2036
2036
|
end
|
|
2037
2037
|
|
|
2038
|
-
module
|
|
2038
|
+
module Speciesstring2
|
|
2039
2039
|
def value
|
|
2040
2040
|
a.value
|
|
2041
2041
|
end
|
|
@@ -2053,11 +2053,11 @@ module ScientificNameClean
|
|
|
2053
2053
|
end
|
|
2054
2054
|
|
|
2055
2055
|
def details
|
|
2056
|
-
{:species => {:
|
|
2056
|
+
{:species => {:string => a.value}}
|
|
2057
2057
|
end
|
|
2058
2058
|
end
|
|
2059
2059
|
|
|
2060
|
-
module
|
|
2060
|
+
module Speciesstring3
|
|
2061
2061
|
def canonical
|
|
2062
2062
|
value
|
|
2063
2063
|
end
|
|
@@ -2071,14 +2071,14 @@ module ScientificNameClean
|
|
|
2071
2071
|
end
|
|
2072
2072
|
|
|
2073
2073
|
def details
|
|
2074
|
-
{:species => {:
|
|
2074
|
+
{:species => {:string => value}}
|
|
2075
2075
|
end
|
|
2076
2076
|
end
|
|
2077
2077
|
|
|
2078
|
-
def
|
|
2078
|
+
def _nt_species_string
|
|
2079
2079
|
start_index = index
|
|
2080
|
-
if node_cache[:
|
|
2081
|
-
cached = node_cache[:
|
|
2080
|
+
if node_cache[:species_string].has_key?(index)
|
|
2081
|
+
cached = node_cache[:species_string][index]
|
|
2082
2082
|
@index = cached.interval.end if cached
|
|
2083
2083
|
return cached
|
|
2084
2084
|
end
|
|
@@ -2102,7 +2102,7 @@ module ScientificNameClean
|
|
|
2102
2102
|
end
|
|
2103
2103
|
if s4.last
|
|
2104
2104
|
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
|
2105
|
-
r4.extend(
|
|
2105
|
+
r4.extend(Speciesstring0)
|
|
2106
2106
|
else
|
|
2107
2107
|
@index = i4
|
|
2108
2108
|
r4 = nil
|
|
@@ -2117,8 +2117,8 @@ module ScientificNameClean
|
|
|
2117
2117
|
end
|
|
2118
2118
|
if s1.last
|
|
2119
2119
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
2120
|
-
r1.extend(
|
|
2121
|
-
r1.extend(
|
|
2120
|
+
r1.extend(Speciesstring1)
|
|
2121
|
+
r1.extend(Speciesstring2)
|
|
2122
2122
|
else
|
|
2123
2123
|
@index = i1
|
|
2124
2124
|
r1 = nil
|
|
@@ -2127,7 +2127,7 @@ module ScientificNameClean
|
|
|
2127
2127
|
r0 = r1
|
|
2128
2128
|
else
|
|
2129
2129
|
r8 = _nt_species_word
|
|
2130
|
-
r8.extend(
|
|
2130
|
+
r8.extend(Speciesstring3)
|
|
2131
2131
|
if r8
|
|
2132
2132
|
r0 = r8
|
|
2133
2133
|
else
|
|
@@ -2141,12 +2141,12 @@ module ScientificNameClean
|
|
|
2141
2141
|
end
|
|
2142
2142
|
end
|
|
2143
2143
|
|
|
2144
|
-
node_cache[:
|
|
2144
|
+
node_cache[:species_string][start_index] = r0
|
|
2145
2145
|
|
|
2146
2146
|
r0
|
|
2147
2147
|
end
|
|
2148
2148
|
|
|
2149
|
-
module
|
|
2149
|
+
module Infragenus0
|
|
2150
2150
|
def left_paren
|
|
2151
2151
|
elements[0]
|
|
2152
2152
|
end
|
|
@@ -2168,7 +2168,7 @@ module ScientificNameClean
|
|
|
2168
2168
|
end
|
|
2169
2169
|
end
|
|
2170
2170
|
|
|
2171
|
-
module
|
|
2171
|
+
module Infragenus1
|
|
2172
2172
|
def value
|
|
2173
2173
|
"(" + a.value + ")"
|
|
2174
2174
|
end
|
|
@@ -2178,18 +2178,18 @@ module ScientificNameClean
|
|
|
2178
2178
|
end
|
|
2179
2179
|
|
|
2180
2180
|
def pos
|
|
2181
|
-
{a.interval.begin => ['
|
|
2181
|
+
{a.interval.begin => ['infragenus', a.interval.end]}
|
|
2182
2182
|
end
|
|
2183
2183
|
|
|
2184
2184
|
def details
|
|
2185
|
-
{:
|
|
2185
|
+
{:infragenus => {:string => a.value}}
|
|
2186
2186
|
end
|
|
2187
2187
|
end
|
|
2188
2188
|
|
|
2189
|
-
def
|
|
2189
|
+
def _nt_infragenus
|
|
2190
2190
|
start_index = index
|
|
2191
|
-
if node_cache[:
|
|
2192
|
-
cached = node_cache[:
|
|
2191
|
+
if node_cache[:infragenus].has_key?(index)
|
|
2192
|
+
cached = node_cache[:infragenus][index]
|
|
2193
2193
|
@index = cached.interval.end if cached
|
|
2194
2194
|
return cached
|
|
2195
2195
|
end
|
|
@@ -2215,14 +2215,14 @@ module ScientificNameClean
|
|
|
2215
2215
|
end
|
|
2216
2216
|
if s0.last
|
|
2217
2217
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
2218
|
-
r0.extend(
|
|
2219
|
-
r0.extend(
|
|
2218
|
+
r0.extend(Infragenus0)
|
|
2219
|
+
r0.extend(Infragenus1)
|
|
2220
2220
|
else
|
|
2221
2221
|
@index = i0
|
|
2222
2222
|
r0 = nil
|
|
2223
2223
|
end
|
|
2224
2224
|
|
|
2225
|
-
node_cache[:
|
|
2225
|
+
node_cache[:infragenus][start_index] = r0
|
|
2226
2226
|
|
|
2227
2227
|
r0
|
|
2228
2228
|
end
|
|
@@ -2266,7 +2266,7 @@ module ScientificNameClean
|
|
|
2266
2266
|
end
|
|
2267
2267
|
|
|
2268
2268
|
def details
|
|
2269
|
-
{:genus => {:
|
|
2269
|
+
{:genus => {:string => a.value}}
|
|
2270
2270
|
end
|
|
2271
2271
|
end
|
|
2272
2272
|
|
|
@@ -2373,7 +2373,7 @@ module ScientificNameClean
|
|
|
2373
2373
|
|
|
2374
2374
|
i0 = index
|
|
2375
2375
|
i1, s1 = index, []
|
|
2376
|
-
r2 =
|
|
2376
|
+
r2 = _nt_uninomial_string
|
|
2377
2377
|
s1 << r2
|
|
2378
2378
|
if r2
|
|
2379
2379
|
r3 = _nt_space_hard
|
|
@@ -2394,7 +2394,7 @@ module ScientificNameClean
|
|
|
2394
2394
|
if r1
|
|
2395
2395
|
r0 = r1
|
|
2396
2396
|
else
|
|
2397
|
-
r5 =
|
|
2397
|
+
r5 = _nt_uninomial_string
|
|
2398
2398
|
if r5
|
|
2399
2399
|
r0 = r5
|
|
2400
2400
|
else
|
|
@@ -2408,7 +2408,7 @@ module ScientificNameClean
|
|
|
2408
2408
|
r0
|
|
2409
2409
|
end
|
|
2410
2410
|
|
|
2411
|
-
module
|
|
2411
|
+
module Uninomialstring0
|
|
2412
2412
|
def canonical
|
|
2413
2413
|
value
|
|
2414
2414
|
end
|
|
@@ -2422,22 +2422,22 @@ module ScientificNameClean
|
|
|
2422
2422
|
end
|
|
2423
2423
|
|
|
2424
2424
|
def details
|
|
2425
|
-
{:uninomial => {:
|
|
2425
|
+
{:uninomial => {:string => value}}
|
|
2426
2426
|
end
|
|
2427
2427
|
end
|
|
2428
2428
|
|
|
2429
|
-
def
|
|
2429
|
+
def _nt_uninomial_string
|
|
2430
2430
|
start_index = index
|
|
2431
|
-
if node_cache[:
|
|
2432
|
-
cached = node_cache[:
|
|
2431
|
+
if node_cache[:uninomial_string].has_key?(index)
|
|
2432
|
+
cached = node_cache[:uninomial_string][index]
|
|
2433
2433
|
@index = cached.interval.end if cached
|
|
2434
2434
|
return cached
|
|
2435
2435
|
end
|
|
2436
2436
|
|
|
2437
2437
|
r0 = _nt_cap_latin_word
|
|
2438
|
-
r0.extend(
|
|
2438
|
+
r0.extend(Uninomialstring0)
|
|
2439
2439
|
|
|
2440
|
-
node_cache[:
|
|
2440
|
+
node_cache[:uninomial_string][start_index] = r0
|
|
2441
2441
|
|
|
2442
2442
|
r0
|
|
2443
2443
|
end
|
|
@@ -4861,7 +4861,7 @@ module ScientificNameClean
|
|
|
4861
4861
|
end
|
|
4862
4862
|
|
|
4863
4863
|
def details
|
|
4864
|
-
{:species => {:
|
|
4864
|
+
{:species => {:string => b.value}}
|
|
4865
4865
|
end
|
|
4866
4866
|
end
|
|
4867
4867
|
|
|
@@ -4897,7 +4897,7 @@ module ScientificNameClean
|
|
|
4897
4897
|
end
|
|
4898
4898
|
|
|
4899
4899
|
def details
|
|
4900
|
-
{:species => {:
|
|
4900
|
+
{:species => {:string => b.value}}
|
|
4901
4901
|
end
|
|
4902
4902
|
end
|
|
4903
4903
|
|
|
@@ -4933,7 +4933,7 @@ module ScientificNameClean
|
|
|
4933
4933
|
end
|
|
4934
4934
|
|
|
4935
4935
|
def details
|
|
4936
|
-
{:species => {:
|
|
4936
|
+
{:species => {:string => b.value}}
|
|
4937
4937
|
end
|
|
4938
4938
|
end
|
|
4939
4939
|
|