biodiversity 0.5.14 → 0.5.15
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/biodiversity.gemspec +2 -5
- data/lib/biodiversity/parser/scientific_name_canonical.rb +3 -3
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +4 -4
- data/lib/biodiversity/parser/scientific_name_clean.rb +58 -58
- data/lib/biodiversity/parser/scientific_name_clean.treetop +25 -25
- data/lib/biodiversity/parser/scientific_name_dirty.rb +9 -9
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +5 -5
- data/spec/parser/scientific_name_canonical.spec.rb +6 -6
- data/spec/parser/scientific_name_clean.spec.rb +75 -75
- data/spec/parser/scientific_name_dirty.spec.rb +14 -14
- data/spec/parser/test_data.txt +148 -148
- metadata +5 -17
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.15
|
data/biodiversity.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{biodiversity}
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.15"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Dmitry Mozzherin"]
|
12
|
-
s.date = %q{2010-03-
|
12
|
+
s.date = %q{2010-03-24}
|
13
13
|
s.default_executable = %q{nnparse}
|
14
14
|
s.description = %q{Tools for biodiversity informatics}
|
15
15
|
s.email = %q{dmozzherin@gmail.com}
|
@@ -72,16 +72,13 @@ Gem::Specification.new do |s|
|
|
72
72
|
|
73
73
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
74
74
|
s.add_runtime_dependency(%q<treetop>, [">= 0"])
|
75
|
-
s.add_runtime_dependency(%q<json>, [">= 0"])
|
76
75
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
77
76
|
else
|
78
77
|
s.add_dependency(%q<treetop>, [">= 0"])
|
79
|
-
s.add_dependency(%q<json>, [">= 0"])
|
80
78
|
s.add_dependency(%q<rspec>, [">= 0"])
|
81
79
|
end
|
82
80
|
else
|
83
81
|
s.add_dependency(%q<treetop>, [">= 0"])
|
84
|
-
s.add_dependency(%q<json>, [">= 0"])
|
85
82
|
s.add_dependency(%q<rspec>, [">= 0"])
|
86
83
|
end
|
87
84
|
end
|
@@ -191,7 +191,7 @@ module ScientificNameCanonical
|
|
191
191
|
r3 = _nt_space
|
192
192
|
s1 << r3
|
193
193
|
if r3
|
194
|
-
r4 =
|
194
|
+
r4 = _nt_infragenus
|
195
195
|
s1 << r4
|
196
196
|
if r4
|
197
197
|
r5 = _nt_space
|
@@ -225,7 +225,7 @@ module ScientificNameCanonical
|
|
225
225
|
r10 = _nt_space
|
226
226
|
s8 << r10
|
227
227
|
if r10
|
228
|
-
r11 =
|
228
|
+
r11 = _nt_infragenus
|
229
229
|
s8 << r11
|
230
230
|
if r11
|
231
231
|
r12 = _nt_garbage
|
@@ -318,7 +318,7 @@ module ScientificNameCanonical
|
|
318
318
|
end
|
319
319
|
|
320
320
|
i0, s0 = index, []
|
321
|
-
r1 =
|
321
|
+
r1 = _nt_uninomial_string
|
322
322
|
s0 << r1
|
323
323
|
if r1
|
324
324
|
r2 = _nt_garbage
|
@@ -27,7 +27,7 @@ grammar ScientificNameCanonical
|
|
27
27
|
|
28
28
|
rule multinomial_with_garbage
|
29
29
|
|
30
|
-
a:genus space b:
|
30
|
+
a:genus space b:infragenus space c:species garbage {
|
31
31
|
def value
|
32
32
|
a.value + " " + b.value + " " + c.value
|
33
33
|
end
|
@@ -45,7 +45,7 @@ grammar ScientificNameCanonical
|
|
45
45
|
end
|
46
46
|
}
|
47
47
|
/
|
48
|
-
a:genus space b:
|
48
|
+
a:genus space b:infragenus garbage {
|
49
49
|
def value
|
50
50
|
a.value + " " + b.value
|
51
51
|
end
|
@@ -83,7 +83,7 @@ grammar ScientificNameCanonical
|
|
83
83
|
end
|
84
84
|
|
85
85
|
rule uninomial_with_garbage
|
86
|
-
a:
|
86
|
+
a:uninomial_string b:garbage {
|
87
87
|
def value
|
88
88
|
a.value
|
89
89
|
end
|
@@ -108,4 +108,4 @@ grammar ScientificNameCanonical
|
|
108
108
|
space_hard [^ш]+
|
109
109
|
end
|
110
110
|
|
111
|
-
end
|
111
|
+
end
|
@@ -866,7 +866,7 @@ module ScientificNameClean
|
|
866
866
|
r3 = _nt_space
|
867
867
|
s1 << r3
|
868
868
|
if r3
|
869
|
-
r4 =
|
869
|
+
r4 = _nt_infragenus
|
870
870
|
s1 << r4
|
871
871
|
if r4
|
872
872
|
r5 = _nt_space
|
@@ -917,7 +917,7 @@ module ScientificNameClean
|
|
917
917
|
r14 = _nt_space
|
918
918
|
s12 << r14
|
919
919
|
if r14
|
920
|
-
r15 =
|
920
|
+
r15 = _nt_infragenus
|
921
921
|
s12 << r15
|
922
922
|
if r15
|
923
923
|
r16 = _nt_space
|
@@ -1171,7 +1171,7 @@ module ScientificNameClean
|
|
1171
1171
|
|
1172
1172
|
i0 = index
|
1173
1173
|
i1, s1 = index, []
|
1174
|
-
r2 =
|
1174
|
+
r2 = _nt_infraspecies_string
|
1175
1175
|
s1 << r2
|
1176
1176
|
if r2
|
1177
1177
|
r3 = _nt_space
|
@@ -1192,7 +1192,7 @@ module ScientificNameClean
|
|
1192
1192
|
if r1
|
1193
1193
|
r0 = r1
|
1194
1194
|
else
|
1195
|
-
r5 =
|
1195
|
+
r5 = _nt_infraspecies_string
|
1196
1196
|
if r5
|
1197
1197
|
r0 = r5
|
1198
1198
|
else
|
@@ -1206,7 +1206,7 @@ module ScientificNameClean
|
|
1206
1206
|
r0
|
1207
1207
|
end
|
1208
1208
|
|
1209
|
-
module
|
1209
|
+
module Infraspeciesstring0
|
1210
1210
|
def sel
|
1211
1211
|
elements[0]
|
1212
1212
|
end
|
@@ -1220,7 +1220,7 @@ module ScientificNameClean
|
|
1220
1220
|
end
|
1221
1221
|
end
|
1222
1222
|
|
1223
|
-
module
|
1223
|
+
module Infraspeciesstring1
|
1224
1224
|
def value
|
1225
1225
|
sel.apply(a)
|
1226
1226
|
end
|
@@ -1237,14 +1237,14 @@ module ScientificNameClean
|
|
1237
1237
|
end
|
1238
1238
|
end
|
1239
1239
|
|
1240
|
-
module
|
1240
|
+
module Infraspeciesstring2
|
1241
1241
|
def species_word
|
1242
1242
|
elements[0]
|
1243
1243
|
end
|
1244
1244
|
|
1245
1245
|
end
|
1246
1246
|
|
1247
|
-
module
|
1247
|
+
module Infraspeciesstring3
|
1248
1248
|
def value
|
1249
1249
|
text_value
|
1250
1250
|
end
|
@@ -1258,14 +1258,14 @@ module ScientificNameClean
|
|
1258
1258
|
end
|
1259
1259
|
|
1260
1260
|
def details
|
1261
|
-
{:infraspecies => {:
|
1261
|
+
{:infraspecies => {:string => value, :rank => 'n/a'}}
|
1262
1262
|
end
|
1263
1263
|
end
|
1264
1264
|
|
1265
|
-
def
|
1265
|
+
def _nt_infraspecies_string
|
1266
1266
|
start_index = index
|
1267
|
-
if node_cache[:
|
1268
|
-
cached = node_cache[:
|
1267
|
+
if node_cache[:infraspecies_string].has_key?(index)
|
1268
|
+
cached = node_cache[:infraspecies_string][index]
|
1269
1269
|
@index = cached.interval.end if cached
|
1270
1270
|
return cached
|
1271
1271
|
end
|
@@ -1284,8 +1284,8 @@ module ScientificNameClean
|
|
1284
1284
|
end
|
1285
1285
|
if s1.last
|
1286
1286
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
1287
|
-
r1.extend(
|
1288
|
-
r1.extend(
|
1287
|
+
r1.extend(Infraspeciesstring0)
|
1288
|
+
r1.extend(Infraspeciesstring1)
|
1289
1289
|
else
|
1290
1290
|
@index = i1
|
1291
1291
|
r1 = nil
|
@@ -1314,8 +1314,8 @@ module ScientificNameClean
|
|
1314
1314
|
end
|
1315
1315
|
if s5.last
|
1316
1316
|
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
1317
|
-
r5.extend(
|
1318
|
-
r5.extend(
|
1317
|
+
r5.extend(Infraspeciesstring2)
|
1318
|
+
r5.extend(Infraspeciesstring3)
|
1319
1319
|
else
|
1320
1320
|
@index = i5
|
1321
1321
|
r5 = nil
|
@@ -1328,7 +1328,7 @@ module ScientificNameClean
|
|
1328
1328
|
end
|
1329
1329
|
end
|
1330
1330
|
|
1331
|
-
node_cache[:
|
1331
|
+
node_cache[:infraspecies_string][start_index] = r0
|
1332
1332
|
|
1333
1333
|
r0
|
1334
1334
|
end
|
@@ -1400,7 +1400,7 @@ module ScientificNameClean
|
|
1400
1400
|
end
|
1401
1401
|
|
1402
1402
|
def details(a = nil)
|
1403
|
-
{:infraspecies => {:
|
1403
|
+
{:infraspecies => {:string => (a.value rescue nil), :rank => text_value}}
|
1404
1404
|
end
|
1405
1405
|
end
|
1406
1406
|
|
@@ -1864,7 +1864,7 @@ module ScientificNameClean
|
|
1864
1864
|
" " + a.value
|
1865
1865
|
end
|
1866
1866
|
def details(a = nil)
|
1867
|
-
{:infraspecies => {:
|
1867
|
+
{:infraspecies => {:string => (a.value rescue nil), :rank => value}}
|
1868
1868
|
end
|
1869
1869
|
end
|
1870
1870
|
|
@@ -1979,7 +1979,7 @@ module ScientificNameClean
|
|
1979
1979
|
|
1980
1980
|
i0 = index
|
1981
1981
|
i1, s1 = index, []
|
1982
|
-
r2 =
|
1982
|
+
r2 = _nt_species_string
|
1983
1983
|
s1 << r2
|
1984
1984
|
if r2
|
1985
1985
|
r3 = _nt_space
|
@@ -2000,7 +2000,7 @@ module ScientificNameClean
|
|
2000
2000
|
if r1
|
2001
2001
|
r0 = r1
|
2002
2002
|
else
|
2003
|
-
r5 =
|
2003
|
+
r5 = _nt_species_string
|
2004
2004
|
if r5
|
2005
2005
|
r0 = r5
|
2006
2006
|
else
|
@@ -2014,7 +2014,7 @@ module ScientificNameClean
|
|
2014
2014
|
r0
|
2015
2015
|
end
|
2016
2016
|
|
2017
|
-
module
|
2017
|
+
module Speciesstring0
|
2018
2018
|
def space_hard
|
2019
2019
|
elements[0]
|
2020
2020
|
end
|
@@ -2028,14 +2028,14 @@ module ScientificNameClean
|
|
2028
2028
|
end
|
2029
2029
|
end
|
2030
2030
|
|
2031
|
-
module
|
2031
|
+
module Speciesstring1
|
2032
2032
|
def a
|
2033
2033
|
elements[0]
|
2034
2034
|
end
|
2035
2035
|
|
2036
2036
|
end
|
2037
2037
|
|
2038
|
-
module
|
2038
|
+
module Speciesstring2
|
2039
2039
|
def value
|
2040
2040
|
a.value
|
2041
2041
|
end
|
@@ -2053,11 +2053,11 @@ module ScientificNameClean
|
|
2053
2053
|
end
|
2054
2054
|
|
2055
2055
|
def details
|
2056
|
-
{:species => {:
|
2056
|
+
{:species => {:string => a.value}}
|
2057
2057
|
end
|
2058
2058
|
end
|
2059
2059
|
|
2060
|
-
module
|
2060
|
+
module Speciesstring3
|
2061
2061
|
def canonical
|
2062
2062
|
value
|
2063
2063
|
end
|
@@ -2071,14 +2071,14 @@ module ScientificNameClean
|
|
2071
2071
|
end
|
2072
2072
|
|
2073
2073
|
def details
|
2074
|
-
{:species => {:
|
2074
|
+
{:species => {:string => value}}
|
2075
2075
|
end
|
2076
2076
|
end
|
2077
2077
|
|
2078
|
-
def
|
2078
|
+
def _nt_species_string
|
2079
2079
|
start_index = index
|
2080
|
-
if node_cache[:
|
2081
|
-
cached = node_cache[:
|
2080
|
+
if node_cache[:species_string].has_key?(index)
|
2081
|
+
cached = node_cache[:species_string][index]
|
2082
2082
|
@index = cached.interval.end if cached
|
2083
2083
|
return cached
|
2084
2084
|
end
|
@@ -2102,7 +2102,7 @@ module ScientificNameClean
|
|
2102
2102
|
end
|
2103
2103
|
if s4.last
|
2104
2104
|
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
2105
|
-
r4.extend(
|
2105
|
+
r4.extend(Speciesstring0)
|
2106
2106
|
else
|
2107
2107
|
@index = i4
|
2108
2108
|
r4 = nil
|
@@ -2117,8 +2117,8 @@ module ScientificNameClean
|
|
2117
2117
|
end
|
2118
2118
|
if s1.last
|
2119
2119
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
2120
|
-
r1.extend(
|
2121
|
-
r1.extend(
|
2120
|
+
r1.extend(Speciesstring1)
|
2121
|
+
r1.extend(Speciesstring2)
|
2122
2122
|
else
|
2123
2123
|
@index = i1
|
2124
2124
|
r1 = nil
|
@@ -2127,7 +2127,7 @@ module ScientificNameClean
|
|
2127
2127
|
r0 = r1
|
2128
2128
|
else
|
2129
2129
|
r8 = _nt_species_word
|
2130
|
-
r8.extend(
|
2130
|
+
r8.extend(Speciesstring3)
|
2131
2131
|
if r8
|
2132
2132
|
r0 = r8
|
2133
2133
|
else
|
@@ -2141,12 +2141,12 @@ module ScientificNameClean
|
|
2141
2141
|
end
|
2142
2142
|
end
|
2143
2143
|
|
2144
|
-
node_cache[:
|
2144
|
+
node_cache[:species_string][start_index] = r0
|
2145
2145
|
|
2146
2146
|
r0
|
2147
2147
|
end
|
2148
2148
|
|
2149
|
-
module
|
2149
|
+
module Infragenus0
|
2150
2150
|
def left_paren
|
2151
2151
|
elements[0]
|
2152
2152
|
end
|
@@ -2168,7 +2168,7 @@ module ScientificNameClean
|
|
2168
2168
|
end
|
2169
2169
|
end
|
2170
2170
|
|
2171
|
-
module
|
2171
|
+
module Infragenus1
|
2172
2172
|
def value
|
2173
2173
|
"(" + a.value + ")"
|
2174
2174
|
end
|
@@ -2178,18 +2178,18 @@ module ScientificNameClean
|
|
2178
2178
|
end
|
2179
2179
|
|
2180
2180
|
def pos
|
2181
|
-
{a.interval.begin => ['
|
2181
|
+
{a.interval.begin => ['infragenus', a.interval.end]}
|
2182
2182
|
end
|
2183
2183
|
|
2184
2184
|
def details
|
2185
|
-
{:
|
2185
|
+
{:infragenus => {:string => a.value}}
|
2186
2186
|
end
|
2187
2187
|
end
|
2188
2188
|
|
2189
|
-
def
|
2189
|
+
def _nt_infragenus
|
2190
2190
|
start_index = index
|
2191
|
-
if node_cache[:
|
2192
|
-
cached = node_cache[:
|
2191
|
+
if node_cache[:infragenus].has_key?(index)
|
2192
|
+
cached = node_cache[:infragenus][index]
|
2193
2193
|
@index = cached.interval.end if cached
|
2194
2194
|
return cached
|
2195
2195
|
end
|
@@ -2215,14 +2215,14 @@ module ScientificNameClean
|
|
2215
2215
|
end
|
2216
2216
|
if s0.last
|
2217
2217
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
2218
|
-
r0.extend(
|
2219
|
-
r0.extend(
|
2218
|
+
r0.extend(Infragenus0)
|
2219
|
+
r0.extend(Infragenus1)
|
2220
2220
|
else
|
2221
2221
|
@index = i0
|
2222
2222
|
r0 = nil
|
2223
2223
|
end
|
2224
2224
|
|
2225
|
-
node_cache[:
|
2225
|
+
node_cache[:infragenus][start_index] = r0
|
2226
2226
|
|
2227
2227
|
r0
|
2228
2228
|
end
|
@@ -2266,7 +2266,7 @@ module ScientificNameClean
|
|
2266
2266
|
end
|
2267
2267
|
|
2268
2268
|
def details
|
2269
|
-
{:genus => {:
|
2269
|
+
{:genus => {:string => a.value}}
|
2270
2270
|
end
|
2271
2271
|
end
|
2272
2272
|
|
@@ -2373,7 +2373,7 @@ module ScientificNameClean
|
|
2373
2373
|
|
2374
2374
|
i0 = index
|
2375
2375
|
i1, s1 = index, []
|
2376
|
-
r2 =
|
2376
|
+
r2 = _nt_uninomial_string
|
2377
2377
|
s1 << r2
|
2378
2378
|
if r2
|
2379
2379
|
r3 = _nt_space_hard
|
@@ -2394,7 +2394,7 @@ module ScientificNameClean
|
|
2394
2394
|
if r1
|
2395
2395
|
r0 = r1
|
2396
2396
|
else
|
2397
|
-
r5 =
|
2397
|
+
r5 = _nt_uninomial_string
|
2398
2398
|
if r5
|
2399
2399
|
r0 = r5
|
2400
2400
|
else
|
@@ -2408,7 +2408,7 @@ module ScientificNameClean
|
|
2408
2408
|
r0
|
2409
2409
|
end
|
2410
2410
|
|
2411
|
-
module
|
2411
|
+
module Uninomialstring0
|
2412
2412
|
def canonical
|
2413
2413
|
value
|
2414
2414
|
end
|
@@ -2422,22 +2422,22 @@ module ScientificNameClean
|
|
2422
2422
|
end
|
2423
2423
|
|
2424
2424
|
def details
|
2425
|
-
{:uninomial => {:
|
2425
|
+
{:uninomial => {:string => value}}
|
2426
2426
|
end
|
2427
2427
|
end
|
2428
2428
|
|
2429
|
-
def
|
2429
|
+
def _nt_uninomial_string
|
2430
2430
|
start_index = index
|
2431
|
-
if node_cache[:
|
2432
|
-
cached = node_cache[:
|
2431
|
+
if node_cache[:uninomial_string].has_key?(index)
|
2432
|
+
cached = node_cache[:uninomial_string][index]
|
2433
2433
|
@index = cached.interval.end if cached
|
2434
2434
|
return cached
|
2435
2435
|
end
|
2436
2436
|
|
2437
2437
|
r0 = _nt_cap_latin_word
|
2438
|
-
r0.extend(
|
2438
|
+
r0.extend(Uninomialstring0)
|
2439
2439
|
|
2440
|
-
node_cache[:
|
2440
|
+
node_cache[:uninomial_string][start_index] = r0
|
2441
2441
|
|
2442
2442
|
r0
|
2443
2443
|
end
|
@@ -4861,7 +4861,7 @@ module ScientificNameClean
|
|
4861
4861
|
end
|
4862
4862
|
|
4863
4863
|
def details
|
4864
|
-
{:species => {:
|
4864
|
+
{:species => {:string => b.value}}
|
4865
4865
|
end
|
4866
4866
|
end
|
4867
4867
|
|
@@ -4897,7 +4897,7 @@ module ScientificNameClean
|
|
4897
4897
|
end
|
4898
4898
|
|
4899
4899
|
def details
|
4900
|
-
{:species => {:
|
4900
|
+
{:species => {:string => b.value}}
|
4901
4901
|
end
|
4902
4902
|
end
|
4903
4903
|
|
@@ -4933,7 +4933,7 @@ module ScientificNameClean
|
|
4933
4933
|
end
|
4934
4934
|
|
4935
4935
|
def details
|
4936
|
-
{:species => {:
|
4936
|
+
{:species => {:string => b.value}}
|
4937
4937
|
end
|
4938
4938
|
end
|
4939
4939
|
|