dimus-biodiversity 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/nnparse +10 -6
- data/lib/biodiversity/parser/scientific_name.rb +590 -135
- data/lib/biodiversity/parser/scientific_name.treetop +92 -18
- data/spec/parser/scientific_name.spec.rb +52 -8
- metadata +2 -2
data/bin/nnparse
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'rubygems'
|
3
|
-
gem 'dimus-biodiversity' rescue gem 'biodiversity'
|
3
|
+
gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil
|
4
4
|
|
5
5
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
6
6
|
require 'biodiversity'
|
@@ -37,11 +37,15 @@ IO.foreach(ARGV[0]) do |n|
|
|
37
37
|
puts n
|
38
38
|
count += 1
|
39
39
|
else
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
40
|
+
begin
|
41
|
+
name_dict[:output] = parsed.value
|
42
|
+
name_dict[:canononical] = parsed.canonical
|
43
|
+
name_dict[:details] = parsed.details
|
44
|
+
name_dict[:parsed => true]
|
45
|
+
last_result = JSON.generate name_dict
|
46
|
+
rescue
|
47
|
+
puts 'PROBLEM: ' + n
|
48
|
+
end
|
45
49
|
end
|
46
50
|
end
|
47
51
|
o.write(last_result + "\n") if last_result
|
@@ -741,6 +741,10 @@ module ScientificName
|
|
741
741
|
elements[1]
|
742
742
|
end
|
743
743
|
|
744
|
+
def ex_sep
|
745
|
+
elements[2]
|
746
|
+
end
|
747
|
+
|
744
748
|
def space
|
745
749
|
elements[3]
|
746
750
|
end
|
@@ -821,13 +825,7 @@ module ScientificName
|
|
821
825
|
r7 = _nt_space
|
822
826
|
s5 << r7
|
823
827
|
if r7
|
824
|
-
|
825
|
-
r8 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
826
|
-
@index += 2
|
827
|
-
else
|
828
|
-
terminal_parse_failure("ex")
|
829
|
-
r8 = nil
|
830
|
-
end
|
828
|
+
r8 = _nt_ex_sep
|
831
829
|
s5 << r8
|
832
830
|
if r8
|
833
831
|
r9 = _nt_space
|
@@ -984,9 +982,44 @@ module ScientificName
|
|
984
982
|
elements[3]
|
985
983
|
end
|
986
984
|
|
985
|
+
def space
|
986
|
+
elements[5]
|
987
|
+
end
|
988
|
+
|
989
|
+
def space
|
990
|
+
elements[7]
|
991
|
+
end
|
992
|
+
|
993
|
+
def b
|
994
|
+
elements[8]
|
995
|
+
end
|
987
996
|
end
|
988
997
|
|
989
998
|
module OriginalAuthorsNamesFull1
|
999
|
+
def value
|
1000
|
+
"(" + a.value + " " + b.value + ")"
|
1001
|
+
end
|
1002
|
+
def details
|
1003
|
+
{:orig_authors => a.details[:authors], :year => b.details[:year]}
|
1004
|
+
end
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
module OriginalAuthorsNamesFull2
|
1008
|
+
def space
|
1009
|
+
elements[1]
|
1010
|
+
end
|
1011
|
+
|
1012
|
+
def a
|
1013
|
+
elements[2]
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
def space
|
1017
|
+
elements[3]
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
end
|
1021
|
+
|
1022
|
+
module OriginalAuthorsNamesFull3
|
990
1023
|
def value
|
991
1024
|
"(" + a.value + ")"
|
992
1025
|
end
|
@@ -1003,44 +1036,120 @@ module ScientificName
|
|
1003
1036
|
return cached
|
1004
1037
|
end
|
1005
1038
|
|
1006
|
-
i0
|
1039
|
+
i0 = index
|
1040
|
+
i1, s1 = index, []
|
1007
1041
|
if input.index("(", index) == index
|
1008
|
-
|
1042
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1009
1043
|
@index += 1
|
1010
1044
|
else
|
1011
1045
|
terminal_parse_failure("(")
|
1012
|
-
|
1046
|
+
r2 = nil
|
1013
1047
|
end
|
1014
|
-
|
1015
|
-
if
|
1016
|
-
|
1017
|
-
|
1018
|
-
if
|
1019
|
-
|
1020
|
-
|
1021
|
-
if
|
1022
|
-
|
1023
|
-
|
1024
|
-
if
|
1048
|
+
s1 << r2
|
1049
|
+
if r2
|
1050
|
+
r3 = _nt_space
|
1051
|
+
s1 << r3
|
1052
|
+
if r3
|
1053
|
+
r4 = _nt_authors_names
|
1054
|
+
s1 << r4
|
1055
|
+
if r4
|
1056
|
+
r5 = _nt_space
|
1057
|
+
s1 << r5
|
1058
|
+
if r5
|
1025
1059
|
if input.index(")", index) == index
|
1026
|
-
|
1060
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1027
1061
|
@index += 1
|
1028
1062
|
else
|
1029
1063
|
terminal_parse_failure(")")
|
1030
|
-
|
1064
|
+
r6 = nil
|
1065
|
+
end
|
1066
|
+
s1 << r6
|
1067
|
+
if r6
|
1068
|
+
r7 = _nt_space
|
1069
|
+
s1 << r7
|
1070
|
+
if r7
|
1071
|
+
if input.index(Regexp.new('[,]'), index) == index
|
1072
|
+
r9 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1073
|
+
@index += 1
|
1074
|
+
else
|
1075
|
+
r9 = nil
|
1076
|
+
end
|
1077
|
+
if r9
|
1078
|
+
r8 = r9
|
1079
|
+
else
|
1080
|
+
r8 = instantiate_node(SyntaxNode,input, index...index)
|
1081
|
+
end
|
1082
|
+
s1 << r8
|
1083
|
+
if r8
|
1084
|
+
r10 = _nt_space
|
1085
|
+
s1 << r10
|
1086
|
+
if r10
|
1087
|
+
r11 = _nt_year
|
1088
|
+
s1 << r11
|
1089
|
+
end
|
1090
|
+
end
|
1091
|
+
end
|
1031
1092
|
end
|
1032
|
-
s0 << r5
|
1033
1093
|
end
|
1034
1094
|
end
|
1035
1095
|
end
|
1036
1096
|
end
|
1037
|
-
if
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1097
|
+
if s1.last
|
1098
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
1099
|
+
r1.extend(OriginalAuthorsNamesFull0)
|
1100
|
+
r1.extend(OriginalAuthorsNamesFull1)
|
1041
1101
|
else
|
1042
|
-
self.index =
|
1043
|
-
|
1102
|
+
self.index = i1
|
1103
|
+
r1 = nil
|
1104
|
+
end
|
1105
|
+
if r1
|
1106
|
+
r0 = r1
|
1107
|
+
else
|
1108
|
+
i12, s12 = index, []
|
1109
|
+
if input.index("(", index) == index
|
1110
|
+
r13 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1111
|
+
@index += 1
|
1112
|
+
else
|
1113
|
+
terminal_parse_failure("(")
|
1114
|
+
r13 = nil
|
1115
|
+
end
|
1116
|
+
s12 << r13
|
1117
|
+
if r13
|
1118
|
+
r14 = _nt_space
|
1119
|
+
s12 << r14
|
1120
|
+
if r14
|
1121
|
+
r15 = _nt_authors_names_full
|
1122
|
+
s12 << r15
|
1123
|
+
if r15
|
1124
|
+
r16 = _nt_space
|
1125
|
+
s12 << r16
|
1126
|
+
if r16
|
1127
|
+
if input.index(")", index) == index
|
1128
|
+
r17 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1129
|
+
@index += 1
|
1130
|
+
else
|
1131
|
+
terminal_parse_failure(")")
|
1132
|
+
r17 = nil
|
1133
|
+
end
|
1134
|
+
s12 << r17
|
1135
|
+
end
|
1136
|
+
end
|
1137
|
+
end
|
1138
|
+
end
|
1139
|
+
if s12.last
|
1140
|
+
r12 = instantiate_node(SyntaxNode,input, i12...index, s12)
|
1141
|
+
r12.extend(OriginalAuthorsNamesFull2)
|
1142
|
+
r12.extend(OriginalAuthorsNamesFull3)
|
1143
|
+
else
|
1144
|
+
self.index = i12
|
1145
|
+
r12 = nil
|
1146
|
+
end
|
1147
|
+
if r12
|
1148
|
+
r0 = r12
|
1149
|
+
else
|
1150
|
+
self.index = i0
|
1151
|
+
r0 = nil
|
1152
|
+
end
|
1044
1153
|
end
|
1045
1154
|
|
1046
1155
|
node_cache[:original_authors_names_full][start_index] = r0
|
@@ -1135,6 +1244,10 @@ module ScientificName
|
|
1135
1244
|
elements[1]
|
1136
1245
|
end
|
1137
1246
|
|
1247
|
+
def ex_sep
|
1248
|
+
elements[2]
|
1249
|
+
end
|
1250
|
+
|
1138
1251
|
def space
|
1139
1252
|
elements[3]
|
1140
1253
|
end
|
@@ -1168,13 +1281,7 @@ module ScientificName
|
|
1168
1281
|
r2 = _nt_space
|
1169
1282
|
s0 << r2
|
1170
1283
|
if r2
|
1171
|
-
|
1172
|
-
r3 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
1173
|
-
@index += 2
|
1174
|
-
else
|
1175
|
-
terminal_parse_failure("ex")
|
1176
|
-
r3 = nil
|
1177
|
-
end
|
1284
|
+
r3 = _nt_ex_sep
|
1178
1285
|
s0 << r3
|
1179
1286
|
if r3
|
1180
1287
|
r4 = _nt_space
|
@@ -1290,6 +1397,45 @@ module ScientificName
|
|
1290
1397
|
return r0
|
1291
1398
|
end
|
1292
1399
|
|
1400
|
+
def _nt_ex_sep
|
1401
|
+
start_index = index
|
1402
|
+
if node_cache[:ex_sep].has_key?(index)
|
1403
|
+
cached = node_cache[:ex_sep][index]
|
1404
|
+
@index = cached.interval.end if cached
|
1405
|
+
return cached
|
1406
|
+
end
|
1407
|
+
|
1408
|
+
i0 = index
|
1409
|
+
if input.index("ex", index) == index
|
1410
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
1411
|
+
@index += 2
|
1412
|
+
else
|
1413
|
+
terminal_parse_failure("ex")
|
1414
|
+
r1 = nil
|
1415
|
+
end
|
1416
|
+
if r1
|
1417
|
+
r0 = r1
|
1418
|
+
else
|
1419
|
+
if input.index("in", index) == index
|
1420
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
1421
|
+
@index += 2
|
1422
|
+
else
|
1423
|
+
terminal_parse_failure("in")
|
1424
|
+
r2 = nil
|
1425
|
+
end
|
1426
|
+
if r2
|
1427
|
+
r0 = r2
|
1428
|
+
else
|
1429
|
+
self.index = i0
|
1430
|
+
r0 = nil
|
1431
|
+
end
|
1432
|
+
end
|
1433
|
+
|
1434
|
+
node_cache[:ex_sep][start_index] = r0
|
1435
|
+
|
1436
|
+
return r0
|
1437
|
+
end
|
1438
|
+
|
1293
1439
|
module AuthorsNames0
|
1294
1440
|
def a
|
1295
1441
|
elements[0]
|
@@ -1378,7 +1524,7 @@ module ScientificName
|
|
1378
1524
|
module AuthorNameSeparator0
|
1379
1525
|
def apply(a,b)
|
1380
1526
|
sep = text_value.strip
|
1381
|
-
sep = " "
|
1527
|
+
sep = " et" if ["&","and","et"].include? sep
|
1382
1528
|
a.value + sep + " " + b.value
|
1383
1529
|
end
|
1384
1530
|
|
@@ -1418,8 +1564,32 @@ module ScientificName
|
|
1418
1564
|
r0 = r2
|
1419
1565
|
r0.extend(AuthorNameSeparator0)
|
1420
1566
|
else
|
1421
|
-
|
1422
|
-
|
1567
|
+
if input.index("and", index) == index
|
1568
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
1569
|
+
@index += 3
|
1570
|
+
else
|
1571
|
+
terminal_parse_failure("and")
|
1572
|
+
r3 = nil
|
1573
|
+
end
|
1574
|
+
if r3
|
1575
|
+
r0 = r3
|
1576
|
+
r0.extend(AuthorNameSeparator0)
|
1577
|
+
else
|
1578
|
+
if input.index("et", index) == index
|
1579
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
1580
|
+
@index += 2
|
1581
|
+
else
|
1582
|
+
terminal_parse_failure("et")
|
1583
|
+
r4 = nil
|
1584
|
+
end
|
1585
|
+
if r4
|
1586
|
+
r0 = r4
|
1587
|
+
r0.extend(AuthorNameSeparator0)
|
1588
|
+
else
|
1589
|
+
self.index = i0
|
1590
|
+
r0 = nil
|
1591
|
+
end
|
1592
|
+
end
|
1423
1593
|
end
|
1424
1594
|
end
|
1425
1595
|
|
@@ -2104,17 +2274,17 @@ module ScientificName
|
|
2104
2274
|
s13 << r17
|
2105
2275
|
if r17
|
2106
2276
|
i18 = index
|
2107
|
-
if input.index(Regexp.new('[
|
2277
|
+
if input.index(Regexp.new('[\\.]'), index) == index
|
2108
2278
|
r19 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
2109
2279
|
@index += 1
|
2110
2280
|
else
|
2111
2281
|
r19 = nil
|
2112
2282
|
end
|
2113
2283
|
if r19
|
2284
|
+
r18 = nil
|
2285
|
+
else
|
2114
2286
|
self.index = i18
|
2115
2287
|
r18 = instantiate_node(SyntaxNode,input, index...index)
|
2116
|
-
else
|
2117
|
-
r18 = nil
|
2118
2288
|
end
|
2119
2289
|
s13 << r18
|
2120
2290
|
end
|
@@ -2392,6 +2562,9 @@ module ScientificName
|
|
2392
2562
|
def value
|
2393
2563
|
a.value + b.value
|
2394
2564
|
end
|
2565
|
+
def details
|
2566
|
+
{:editorial_markup => value, :is_valid => false}
|
2567
|
+
end
|
2395
2568
|
end
|
2396
2569
|
|
2397
2570
|
def _nt_editorials
|
@@ -3277,25 +3450,40 @@ module ScientificName
|
|
3277
3450
|
return r0
|
3278
3451
|
end
|
3279
3452
|
|
3280
|
-
module
|
3453
|
+
module CapLatinWord0
|
3454
|
+
def a
|
3455
|
+
elements[0]
|
3456
|
+
end
|
3457
|
+
|
3458
|
+
def b
|
3459
|
+
elements[1]
|
3460
|
+
end
|
3281
3461
|
end
|
3282
3462
|
|
3283
|
-
module
|
3284
|
-
def value
|
3285
|
-
text_value.
|
3463
|
+
module CapLatinWord1
|
3464
|
+
def value
|
3465
|
+
a.text_value + b.value
|
3466
|
+
end
|
3467
|
+
|
3468
|
+
def canonical
|
3469
|
+
value
|
3470
|
+
end
|
3471
|
+
|
3472
|
+
def details
|
3473
|
+
{:uninomial => value}
|
3286
3474
|
end
|
3287
3475
|
end
|
3288
3476
|
|
3289
|
-
def
|
3477
|
+
def _nt_cap_latin_word
|
3290
3478
|
start_index = index
|
3291
|
-
if node_cache[:
|
3292
|
-
cached = node_cache[:
|
3479
|
+
if node_cache[:cap_latin_word].has_key?(index)
|
3480
|
+
cached = node_cache[:cap_latin_word][index]
|
3293
3481
|
@index = cached.interval.end if cached
|
3294
3482
|
return cached
|
3295
3483
|
end
|
3296
3484
|
|
3297
3485
|
i0, s0 = index, []
|
3298
|
-
if input.index(Regexp.new('[
|
3486
|
+
if input.index(Regexp.new('[A-Z]'), index) == index
|
3299
3487
|
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3300
3488
|
@index += 1
|
3301
3489
|
else
|
@@ -3303,117 +3491,303 @@ module ScientificName
|
|
3303
3491
|
end
|
3304
3492
|
s0 << r1
|
3305
3493
|
if r1
|
3306
|
-
|
3307
|
-
loop do
|
3308
|
-
if input.index(Regexp.new('[a-z\\-ëüäöï]'), index) == index
|
3309
|
-
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3310
|
-
@index += 1
|
3311
|
-
else
|
3312
|
-
r3 = nil
|
3313
|
-
end
|
3314
|
-
if r3
|
3315
|
-
s2 << r3
|
3316
|
-
else
|
3317
|
-
break
|
3318
|
-
end
|
3319
|
-
end
|
3320
|
-
if s2.empty?
|
3321
|
-
self.index = i2
|
3322
|
-
r2 = nil
|
3323
|
-
else
|
3324
|
-
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
3325
|
-
end
|
3494
|
+
r2 = _nt_latin_word
|
3326
3495
|
s0 << r2
|
3327
3496
|
end
|
3328
3497
|
if s0.last
|
3329
3498
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
3330
|
-
r0.extend(
|
3331
|
-
r0.extend(
|
3499
|
+
r0.extend(CapLatinWord0)
|
3500
|
+
r0.extend(CapLatinWord1)
|
3332
3501
|
else
|
3333
3502
|
self.index = i0
|
3334
3503
|
r0 = nil
|
3335
3504
|
end
|
3336
3505
|
|
3337
|
-
node_cache[:
|
3506
|
+
node_cache[:cap_latin_word][start_index] = r0
|
3338
3507
|
|
3339
3508
|
return r0
|
3340
3509
|
end
|
3341
3510
|
|
3342
|
-
module
|
3511
|
+
module LatinWord0
|
3512
|
+
def a
|
3513
|
+
elements[0]
|
3514
|
+
end
|
3515
|
+
|
3516
|
+
def b
|
3517
|
+
elements[1]
|
3518
|
+
end
|
3343
3519
|
end
|
3344
3520
|
|
3345
|
-
module
|
3521
|
+
module LatinWord1
|
3346
3522
|
def value
|
3347
|
-
text_value.
|
3348
|
-
end
|
3349
|
-
|
3350
|
-
def canonical
|
3351
|
-
text_value.strip
|
3352
|
-
end
|
3353
|
-
|
3354
|
-
def details
|
3355
|
-
{:uninomial => value}
|
3523
|
+
a.text_value + b.value
|
3356
3524
|
end
|
3357
3525
|
end
|
3358
3526
|
|
3359
|
-
|
3360
|
-
|
3361
|
-
|
3362
|
-
cached = node_cache[:cap_latin_word][index]
|
3363
|
-
@index = cached.interval.end if cached
|
3364
|
-
return cached
|
3527
|
+
module LatinWord2
|
3528
|
+
def a
|
3529
|
+
elements[0]
|
3365
3530
|
end
|
3366
3531
|
|
3367
|
-
|
3368
|
-
|
3369
|
-
|
3532
|
+
def b
|
3533
|
+
elements[1]
|
3534
|
+
end
|
3535
|
+
end
|
3536
|
+
|
3537
|
+
module LatinWord3
|
3538
|
+
def value
|
3539
|
+
a.value + b.value
|
3540
|
+
end
|
3541
|
+
end
|
3542
|
+
|
3543
|
+
def _nt_latin_word
|
3544
|
+
start_index = index
|
3545
|
+
if node_cache[:latin_word].has_key?(index)
|
3546
|
+
cached = node_cache[:latin_word][index]
|
3547
|
+
@index = cached.interval.end if cached
|
3548
|
+
return cached
|
3549
|
+
end
|
3550
|
+
|
3551
|
+
i0 = index
|
3552
|
+
i1, s1 = index, []
|
3553
|
+
if input.index(Regexp.new('[a-zë]'), index) == index
|
3554
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3370
3555
|
@index += 1
|
3371
3556
|
else
|
3557
|
+
r2 = nil
|
3558
|
+
end
|
3559
|
+
s1 << r2
|
3560
|
+
if r2
|
3561
|
+
r3 = _nt_full_name_letters
|
3562
|
+
s1 << r3
|
3563
|
+
end
|
3564
|
+
if s1.last
|
3565
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
3566
|
+
r1.extend(LatinWord0)
|
3567
|
+
r1.extend(LatinWord1)
|
3568
|
+
else
|
3569
|
+
self.index = i1
|
3372
3570
|
r1 = nil
|
3373
3571
|
end
|
3374
|
-
s0 << r1
|
3375
3572
|
if r1
|
3376
|
-
|
3377
|
-
|
3378
|
-
|
3573
|
+
r0 = r1
|
3574
|
+
else
|
3575
|
+
i4, s4 = index, []
|
3576
|
+
r5 = _nt_digraph
|
3577
|
+
s4 << r5
|
3578
|
+
if r5
|
3579
|
+
r6 = _nt_full_name_letters
|
3580
|
+
s4 << r6
|
3581
|
+
end
|
3582
|
+
if s4.last
|
3583
|
+
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
3584
|
+
r4.extend(LatinWord2)
|
3585
|
+
r4.extend(LatinWord3)
|
3379
3586
|
else
|
3380
|
-
|
3587
|
+
self.index = i4
|
3588
|
+
r4 = nil
|
3381
3589
|
end
|
3382
|
-
|
3383
|
-
|
3384
|
-
|
3385
|
-
|
3386
|
-
|
3387
|
-
|
3388
|
-
|
3389
|
-
|
3390
|
-
|
3391
|
-
|
3392
|
-
|
3393
|
-
|
3394
|
-
|
3395
|
-
|
3396
|
-
|
3590
|
+
if r4
|
3591
|
+
r0 = r4
|
3592
|
+
else
|
3593
|
+
self.index = i0
|
3594
|
+
r0 = nil
|
3595
|
+
end
|
3596
|
+
end
|
3597
|
+
|
3598
|
+
node_cache[:latin_word][start_index] = r0
|
3599
|
+
|
3600
|
+
return r0
|
3601
|
+
end
|
3602
|
+
|
3603
|
+
module FullNameLetters0
|
3604
|
+
def a
|
3605
|
+
elements[0]
|
3606
|
+
end
|
3607
|
+
|
3608
|
+
def b
|
3609
|
+
elements[1]
|
3610
|
+
end
|
3611
|
+
end
|
3612
|
+
|
3613
|
+
module FullNameLetters1
|
3614
|
+
def value
|
3615
|
+
a.value + b.value
|
3616
|
+
end
|
3617
|
+
end
|
3618
|
+
|
3619
|
+
module FullNameLetters2
|
3620
|
+
def a
|
3621
|
+
elements[0]
|
3622
|
+
end
|
3623
|
+
|
3624
|
+
def b
|
3625
|
+
elements[1]
|
3626
|
+
end
|
3627
|
+
|
3628
|
+
def c
|
3629
|
+
elements[2]
|
3630
|
+
end
|
3631
|
+
end
|
3632
|
+
|
3633
|
+
module FullNameLetters3
|
3634
|
+
def value
|
3635
|
+
a.value + b.value + c.value
|
3636
|
+
end
|
3637
|
+
end
|
3638
|
+
|
3639
|
+
def _nt_full_name_letters
|
3640
|
+
start_index = index
|
3641
|
+
if node_cache[:full_name_letters].has_key?(index)
|
3642
|
+
cached = node_cache[:full_name_letters][index]
|
3643
|
+
@index = cached.interval.end if cached
|
3644
|
+
return cached
|
3645
|
+
end
|
3646
|
+
|
3647
|
+
i0 = index
|
3648
|
+
i1, s1 = index, []
|
3649
|
+
r2 = _nt_digraph
|
3650
|
+
s1 << r2
|
3651
|
+
if r2
|
3652
|
+
r3 = _nt_full_name_letters
|
3653
|
+
s1 << r3
|
3654
|
+
end
|
3655
|
+
if s1.last
|
3656
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
3657
|
+
r1.extend(FullNameLetters0)
|
3658
|
+
r1.extend(FullNameLetters1)
|
3659
|
+
else
|
3660
|
+
self.index = i1
|
3661
|
+
r1 = nil
|
3662
|
+
end
|
3663
|
+
if r1
|
3664
|
+
r0 = r1
|
3665
|
+
else
|
3666
|
+
i4, s4 = index, []
|
3667
|
+
r5 = _nt_valid_name_letters
|
3668
|
+
s4 << r5
|
3669
|
+
if r5
|
3670
|
+
r6 = _nt_digraph
|
3671
|
+
s4 << r6
|
3672
|
+
if r6
|
3673
|
+
r7 = _nt_full_name_letters
|
3674
|
+
s4 << r7
|
3397
3675
|
end
|
3398
|
-
|
3399
|
-
|
3400
|
-
|
3676
|
+
end
|
3677
|
+
if s4.last
|
3678
|
+
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
3679
|
+
r4.extend(FullNameLetters2)
|
3680
|
+
r4.extend(FullNameLetters3)
|
3681
|
+
else
|
3682
|
+
self.index = i4
|
3683
|
+
r4 = nil
|
3684
|
+
end
|
3685
|
+
if r4
|
3686
|
+
r0 = r4
|
3687
|
+
else
|
3688
|
+
r8 = _nt_valid_name_letters
|
3689
|
+
if r8
|
3690
|
+
r0 = r8
|
3401
3691
|
else
|
3402
|
-
|
3692
|
+
self.index = i0
|
3693
|
+
r0 = nil
|
3403
3694
|
end
|
3404
|
-
s0 << r3
|
3405
3695
|
end
|
3406
3696
|
end
|
3407
|
-
|
3408
|
-
|
3409
|
-
|
3410
|
-
|
3411
|
-
|
3697
|
+
|
3698
|
+
node_cache[:full_name_letters][start_index] = r0
|
3699
|
+
|
3700
|
+
return r0
|
3701
|
+
end
|
3702
|
+
|
3703
|
+
module ValidNameLetters0
|
3704
|
+
def value
|
3705
|
+
text_value
|
3706
|
+
end
|
3707
|
+
end
|
3708
|
+
|
3709
|
+
def _nt_valid_name_letters
|
3710
|
+
start_index = index
|
3711
|
+
if node_cache[:valid_name_letters].has_key?(index)
|
3712
|
+
cached = node_cache[:valid_name_letters][index]
|
3713
|
+
@index = cached.interval.end if cached
|
3714
|
+
return cached
|
3715
|
+
end
|
3716
|
+
|
3717
|
+
s0, i0 = [], index
|
3718
|
+
loop do
|
3719
|
+
if input.index(Regexp.new('[a-z\\-ëüäöï]'), index) == index
|
3720
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3721
|
+
@index += 1
|
3722
|
+
else
|
3723
|
+
r1 = nil
|
3724
|
+
end
|
3725
|
+
if r1
|
3726
|
+
s0 << r1
|
3727
|
+
else
|
3728
|
+
break
|
3729
|
+
end
|
3730
|
+
end
|
3731
|
+
if s0.empty?
|
3412
3732
|
self.index = i0
|
3413
3733
|
r0 = nil
|
3734
|
+
else
|
3735
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
3736
|
+
r0.extend(ValidNameLetters0)
|
3414
3737
|
end
|
3415
3738
|
|
3416
|
-
node_cache[:
|
3739
|
+
node_cache[:valid_name_letters][start_index] = r0
|
3740
|
+
|
3741
|
+
return r0
|
3742
|
+
end
|
3743
|
+
|
3744
|
+
module Digraph0
|
3745
|
+
def value
|
3746
|
+
'ae'
|
3747
|
+
end
|
3748
|
+
end
|
3749
|
+
|
3750
|
+
module Digraph1
|
3751
|
+
def value
|
3752
|
+
'oe'
|
3753
|
+
end
|
3754
|
+
end
|
3755
|
+
|
3756
|
+
def _nt_digraph
|
3757
|
+
start_index = index
|
3758
|
+
if node_cache[:digraph].has_key?(index)
|
3759
|
+
cached = node_cache[:digraph][index]
|
3760
|
+
@index = cached.interval.end if cached
|
3761
|
+
return cached
|
3762
|
+
end
|
3763
|
+
|
3764
|
+
i0 = index
|
3765
|
+
if input.index(Regexp.new('[æ]'), index) == index
|
3766
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3767
|
+
r1.extend(Digraph0)
|
3768
|
+
@index += 1
|
3769
|
+
else
|
3770
|
+
r1 = nil
|
3771
|
+
end
|
3772
|
+
if r1
|
3773
|
+
r0 = r1
|
3774
|
+
else
|
3775
|
+
if input.index(Regexp.new('[œ]'), index) == index
|
3776
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3777
|
+
r2.extend(Digraph1)
|
3778
|
+
@index += 1
|
3779
|
+
else
|
3780
|
+
r2 = nil
|
3781
|
+
end
|
3782
|
+
if r2
|
3783
|
+
r0 = r2
|
3784
|
+
else
|
3785
|
+
self.index = i0
|
3786
|
+
r0 = nil
|
3787
|
+
end
|
3788
|
+
end
|
3789
|
+
|
3790
|
+
node_cache[:digraph][start_index] = r0
|
3417
3791
|
|
3418
3792
|
return r0
|
3419
3793
|
end
|
@@ -3494,29 +3868,110 @@ module ScientificName
|
|
3494
3868
|
return cached
|
3495
3869
|
end
|
3496
3870
|
|
3497
|
-
|
3871
|
+
i0 = index
|
3872
|
+
r1 = _nt_year_with_character
|
3873
|
+
if r1
|
3874
|
+
r0 = r1
|
3875
|
+
else
|
3876
|
+
s2, i2 = [], index
|
3877
|
+
loop do
|
3878
|
+
if input.index(Regexp.new('[0-9\\?]'), index) == index
|
3879
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3880
|
+
@index += 1
|
3881
|
+
else
|
3882
|
+
r3 = nil
|
3883
|
+
end
|
3884
|
+
if r3
|
3885
|
+
s2 << r3
|
3886
|
+
else
|
3887
|
+
break
|
3888
|
+
end
|
3889
|
+
end
|
3890
|
+
if s2.empty?
|
3891
|
+
self.index = i2
|
3892
|
+
r2 = nil
|
3893
|
+
else
|
3894
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
3895
|
+
r2.extend(Year0)
|
3896
|
+
end
|
3897
|
+
if r2
|
3898
|
+
r0 = r2
|
3899
|
+
else
|
3900
|
+
self.index = i0
|
3901
|
+
r0 = nil
|
3902
|
+
end
|
3903
|
+
end
|
3904
|
+
|
3905
|
+
node_cache[:year][start_index] = r0
|
3906
|
+
|
3907
|
+
return r0
|
3908
|
+
end
|
3909
|
+
|
3910
|
+
module YearWithCharacter0
|
3911
|
+
def a
|
3912
|
+
elements[0]
|
3913
|
+
end
|
3914
|
+
|
3915
|
+
end
|
3916
|
+
|
3917
|
+
module YearWithCharacter1
|
3918
|
+
def value
|
3919
|
+
a.text_value
|
3920
|
+
end
|
3921
|
+
def details
|
3922
|
+
{:year => value}
|
3923
|
+
end
|
3924
|
+
end
|
3925
|
+
|
3926
|
+
def _nt_year_with_character
|
3927
|
+
start_index = index
|
3928
|
+
if node_cache[:year_with_character].has_key?(index)
|
3929
|
+
cached = node_cache[:year_with_character][index]
|
3930
|
+
@index = cached.interval.end if cached
|
3931
|
+
return cached
|
3932
|
+
end
|
3933
|
+
|
3934
|
+
i0, s0 = index, []
|
3935
|
+
s1, i1 = [], index
|
3498
3936
|
loop do
|
3499
3937
|
if input.index(Regexp.new('[0-9\\?]'), index) == index
|
3500
|
-
|
3938
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3501
3939
|
@index += 1
|
3502
3940
|
else
|
3503
|
-
|
3941
|
+
r2 = nil
|
3504
3942
|
end
|
3505
|
-
if
|
3506
|
-
|
3943
|
+
if r2
|
3944
|
+
s1 << r2
|
3507
3945
|
else
|
3508
3946
|
break
|
3509
3947
|
end
|
3510
3948
|
end
|
3511
|
-
if
|
3512
|
-
self.index =
|
3513
|
-
|
3949
|
+
if s1.empty?
|
3950
|
+
self.index = i1
|
3951
|
+
r1 = nil
|
3514
3952
|
else
|
3953
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
3954
|
+
end
|
3955
|
+
s0 << r1
|
3956
|
+
if r1
|
3957
|
+
if input.index(Regexp.new('[a-zA-Z]'), index) == index
|
3958
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
3959
|
+
@index += 1
|
3960
|
+
else
|
3961
|
+
r3 = nil
|
3962
|
+
end
|
3963
|
+
s0 << r3
|
3964
|
+
end
|
3965
|
+
if s0.last
|
3515
3966
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
3516
|
-
r0.extend(
|
3967
|
+
r0.extend(YearWithCharacter0)
|
3968
|
+
r0.extend(YearWithCharacter1)
|
3969
|
+
else
|
3970
|
+
self.index = i0
|
3971
|
+
r0 = nil
|
3517
3972
|
end
|
3518
3973
|
|
3519
|
-
node_cache[:
|
3974
|
+
node_cache[:year_with_character][start_index] = r0
|
3520
3975
|
|
3521
3976
|
return r0
|
3522
3977
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
grammar ScientificName
|
2
3
|
|
3
4
|
rule composite_scientific_name
|
@@ -27,7 +28,7 @@ grammar ScientificName
|
|
27
28
|
end
|
28
29
|
}
|
29
30
|
/
|
30
|
-
scientific_name
|
31
|
+
scientific_name
|
31
32
|
end
|
32
33
|
|
33
34
|
rule scientific_name
|
@@ -137,7 +138,7 @@ grammar ScientificName
|
|
137
138
|
end
|
138
139
|
}
|
139
140
|
/
|
140
|
-
a:simple_authors_part space
|
141
|
+
a:simple_authors_part space ex_sep space b:simple_authors_part {
|
141
142
|
def value
|
142
143
|
a.value + " ex " + b.value
|
143
144
|
end
|
@@ -179,6 +180,15 @@ grammar ScientificName
|
|
179
180
|
end
|
180
181
|
|
181
182
|
rule original_authors_names_full
|
183
|
+
"(" space a:authors_names space ")" space [,]? space b:year {
|
184
|
+
def value
|
185
|
+
"(" + a.value + " " + b.value + ")"
|
186
|
+
end
|
187
|
+
def details
|
188
|
+
{:orig_authors => a.details[:authors], :year => b.details[:year]}
|
189
|
+
end
|
190
|
+
}
|
191
|
+
/
|
182
192
|
"(" space a:authors_names_full space ")" {
|
183
193
|
def value
|
184
194
|
"(" + a.value + ")"
|
@@ -202,7 +212,7 @@ grammar ScientificName
|
|
202
212
|
end
|
203
213
|
|
204
214
|
rule authors_revised_name
|
205
|
-
a:authors_names_full space
|
215
|
+
a:authors_names_full space ex_sep space b:authors_names_full {
|
206
216
|
def value
|
207
217
|
a.value + " ex " + b.value
|
208
218
|
end
|
@@ -224,6 +234,10 @@ grammar ScientificName
|
|
224
234
|
/
|
225
235
|
authors_names
|
226
236
|
end
|
237
|
+
|
238
|
+
rule ex_sep
|
239
|
+
("ex"/"in")
|
240
|
+
end
|
227
241
|
|
228
242
|
rule authors_names
|
229
243
|
a:author_name space sep:author_name_separator space b:authors_names {
|
@@ -240,10 +254,10 @@ grammar ScientificName
|
|
240
254
|
end
|
241
255
|
|
242
256
|
rule author_name_separator
|
243
|
-
("&"/",") {
|
257
|
+
("&"/","/"and"/"et") {
|
244
258
|
def apply(a,b)
|
245
259
|
sep = text_value.strip
|
246
|
-
sep = " "
|
260
|
+
sep = " et" if ["&","and","et"].include? sep
|
247
261
|
a.value + sep + " " + b.value
|
248
262
|
end
|
249
263
|
|
@@ -315,7 +329,7 @@ grammar ScientificName
|
|
315
329
|
end
|
316
330
|
}
|
317
331
|
/
|
318
|
-
space a:species_name space b:latin_word
|
332
|
+
space a:species_name space b:latin_word ![\.] {
|
319
333
|
def value
|
320
334
|
a.value + " " + b.value
|
321
335
|
end
|
@@ -385,6 +399,9 @@ grammar ScientificName
|
|
385
399
|
space a:rank space [&]? space b:editorials {
|
386
400
|
def value
|
387
401
|
a.value + b.value
|
402
|
+
end
|
403
|
+
def details
|
404
|
+
{:editorial_markup => value, :is_valid => false}
|
388
405
|
end
|
389
406
|
}
|
390
407
|
/
|
@@ -483,23 +500,15 @@ grammar ScientificName
|
|
483
500
|
end
|
484
501
|
}
|
485
502
|
end
|
486
|
-
|
487
|
-
rule latin_word
|
488
|
-
[a-zë] [a-z\-ëüäöï]+ {
|
489
|
-
def value
|
490
|
-
text_value.strip
|
491
|
-
end
|
492
|
-
}
|
493
|
-
end
|
494
503
|
|
495
504
|
rule cap_latin_word
|
496
|
-
[A-Z]
|
505
|
+
a:[A-Z] b:latin_word {
|
497
506
|
def value
|
498
|
-
text_value.
|
507
|
+
a.text_value + b.value
|
499
508
|
end
|
500
509
|
|
501
510
|
def canonical
|
502
|
-
|
511
|
+
value
|
503
512
|
end
|
504
513
|
|
505
514
|
def details
|
@@ -507,7 +516,59 @@ grammar ScientificName
|
|
507
516
|
end
|
508
517
|
}
|
509
518
|
end
|
519
|
+
|
520
|
+
rule latin_word
|
521
|
+
a:[a-zë] b:full_name_letters {
|
522
|
+
def value
|
523
|
+
a.text_value + b.value
|
524
|
+
end
|
525
|
+
}
|
526
|
+
/
|
527
|
+
a:digraph b:full_name_letters {
|
528
|
+
def value
|
529
|
+
a.value + b.value
|
530
|
+
end
|
531
|
+
}
|
532
|
+
end
|
510
533
|
|
534
|
+
rule full_name_letters
|
535
|
+
a:digraph b:full_name_letters {
|
536
|
+
def value
|
537
|
+
a.value + b.value
|
538
|
+
end
|
539
|
+
}
|
540
|
+
/
|
541
|
+
a:valid_name_letters b:digraph c:full_name_letters {
|
542
|
+
def value
|
543
|
+
a.value + b.value + c.value
|
544
|
+
end
|
545
|
+
}
|
546
|
+
/
|
547
|
+
valid_name_letters
|
548
|
+
end
|
549
|
+
|
550
|
+
rule valid_name_letters
|
551
|
+
[a-z\-ëüäöï]+ {
|
552
|
+
def value
|
553
|
+
text_value
|
554
|
+
end
|
555
|
+
}
|
556
|
+
end
|
557
|
+
|
558
|
+
rule digraph
|
559
|
+
[æ] {
|
560
|
+
def value
|
561
|
+
'ae'
|
562
|
+
end
|
563
|
+
}
|
564
|
+
/
|
565
|
+
[œ] {
|
566
|
+
def value
|
567
|
+
'oe'
|
568
|
+
end
|
569
|
+
}
|
570
|
+
end
|
571
|
+
|
511
572
|
rule hybrid_separator
|
512
573
|
("x"/"X"/"×") {
|
513
574
|
def value
|
@@ -517,6 +578,8 @@ grammar ScientificName
|
|
517
578
|
end
|
518
579
|
|
519
580
|
rule year
|
581
|
+
year_with_character
|
582
|
+
/
|
520
583
|
[0-9\?]+ {
|
521
584
|
def value
|
522
585
|
text_value.strip
|
@@ -526,7 +589,18 @@ grammar ScientificName
|
|
526
589
|
end
|
527
590
|
}
|
528
591
|
end
|
529
|
-
|
592
|
+
|
593
|
+
rule year_with_character
|
594
|
+
a:[0-9\?]+ [a-zA-Z] {
|
595
|
+
def value
|
596
|
+
a.text_value
|
597
|
+
end
|
598
|
+
def details
|
599
|
+
{:year => value}
|
600
|
+
end
|
601
|
+
}
|
602
|
+
end
|
603
|
+
|
530
604
|
rule space
|
531
605
|
[\s]*
|
532
606
|
end
|
@@ -51,6 +51,31 @@ describe ScientificName do
|
|
51
51
|
details(sn).should == {:subgenus=>"Amerigo", :authors=>{:year=>"1999", :names=>["Author"]}, :species=>"pealeii", :genus=>"Doriteuthis"}
|
52
52
|
end
|
53
53
|
|
54
|
+
it 'should parse æ in the name' do
|
55
|
+
names = [
|
56
|
+
["Læptura laetifica Dow, 1913", "Laeptura laetifica Dow 1913"],
|
57
|
+
["Leptura lætifica Dow, 1913", "Leptura laetifica Dow 1913"],
|
58
|
+
["Leptura leætifica Dow, 1913", "Leptura leaetifica Dow 1913"],
|
59
|
+
["Leæptura laetifica Dow, 1913", "Leaeptura laetifica Dow 1913"],
|
60
|
+
["Leœptura laetifica Dow, 1913", "Leoeptura laetifica Dow 1913"]
|
61
|
+
]
|
62
|
+
names.each do |name_pair|
|
63
|
+
parse(name_pair[0]).should_not be_nil
|
64
|
+
value(name_pair[0]).should == name_pair[1]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should parse year' do
|
69
|
+
sn = "Platypus bicaudatulus Schedl 1935"
|
70
|
+
parse(sn).should_not be_nil
|
71
|
+
value(sn).should == "Platypus bicaudatulus Schedl 1935"
|
72
|
+
sn = "Platypus bicaudatulus Schedl, 1935h"
|
73
|
+
parse(sn).should_not be_nil
|
74
|
+
value(sn).should == "Platypus bicaudatulus Schedl 1935"
|
75
|
+
details(sn).should == {:genus=>"Platypus", :species=>"bicaudatulus", :authors=>{:names=>["Schedl"], :year=>"1935"}}
|
76
|
+
parse("Platypus bicaudatulus Schedl, 1935B").should_not be_nil
|
77
|
+
end
|
78
|
+
|
54
79
|
it 'should parse species autonym for complex subspecies authorships' do
|
55
80
|
parse("Aus bus Linn. var. bus").should_not be_nil
|
56
81
|
details("Aus bus Linn. var. bus").should == {:species=>"bus", :species_authors=>{:authors=>{:names=>["Linn."]}}, :genus=>"Aus", :subspecies=>[{:rank=>"var.", :value=>"bus"}]}
|
@@ -61,18 +86,24 @@ describe ScientificName do
|
|
61
86
|
it 'should parse several authors' do
|
62
87
|
sn = "Pseudocercospora dendrobii U. Braun & Crous"
|
63
88
|
parse(sn).should_not be_nil
|
64
|
-
value(sn).should == "Pseudocercospora dendrobii U. Braun
|
89
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
|
65
90
|
canonical(sn).should == "Pseudocercospora dendrobii"
|
66
91
|
details(sn).should == {
|
67
92
|
:authors=>{:names=>["U. Braun","Crous"]},
|
68
93
|
:species=>"dendrobii",
|
69
94
|
:genus=>"Pseudocercospora"}
|
95
|
+
sn = "Pseudocercospora dendrobii U. Braun and Crous"
|
96
|
+
parse(sn).should_not be_nil
|
97
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
|
98
|
+
sn = "Pseudocercospora dendrobii U. Braun et Crous"
|
99
|
+
parse(sn).should_not be_nil
|
100
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
|
70
101
|
end
|
71
102
|
|
72
103
|
it 'should parse several authors with a year' do
|
73
104
|
sn = "Pseudocercospora dendrobii U. Braun & Crous 2003"
|
74
105
|
parse(sn).should_not be_nil
|
75
|
-
value(sn).should == "Pseudocercospora dendrobii U. Braun
|
106
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous 2003"
|
76
107
|
canonical(sn).should == "Pseudocercospora dendrobii"
|
77
108
|
details(sn).should == {
|
78
109
|
:authors=>{:names=>["U. Braun","Crous"], :year => "2003"},
|
@@ -80,21 +111,29 @@ describe ScientificName do
|
|
80
111
|
:genus=>"Pseudocercospora"}
|
81
112
|
sn = "Pseudocercospora dendrobii Crous, 2003"
|
82
113
|
parse(sn).should_not be_nil
|
114
|
+
parse("Zophosis persis (Chatanay, 1914)").should_not be_nil
|
115
|
+
parse("Zophosis persis (Chatanay 1914)").should_not be_nil
|
116
|
+
parse("Zophosis persis (Chatanay), 1914").should_not be_nil
|
117
|
+
value("Zophosis persis (Chatanay), 1914").should == "Zophosis persis (Chatanay 1914)"
|
118
|
+
details("Zophosis persis (Chatanay), 1914").should == {:genus=>"Zophosis", :species=>"persis", :orig_authors=>{:names=>["Chatanay"]}, :year=>"1914"}
|
119
|
+
|
120
|
+
parse("Zophosis persis (Chatanay) 1914").should_not be_nil
|
121
|
+
#parse("Zophosis persis Chatanay (1914)").should_not be_nil
|
83
122
|
end
|
84
123
|
|
85
124
|
it 'should parse scientific name' do
|
86
125
|
parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003").should_not be_nil
|
87
|
-
value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun
|
126
|
+
value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun et Crous 2003"
|
88
127
|
canonical("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
|
89
128
|
{:orig_authors=>{:names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
|
90
129
|
|
91
130
|
parse("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should_not be_nil
|
92
|
-
value("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == "Stagonospora polyspora M.T. Lucas
|
131
|
+
value("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == "Stagonospora polyspora M.T. Lucas et Sousa da Câmara 1934"
|
93
132
|
details("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == {:authors=>{:year=>"1934", :names=>["M.T. Lucas", "Sousa da C\303\242mara"]}, :species=>"polyspora", :genus=>"Stagonospora"}
|
94
133
|
|
95
134
|
parse("Cladoniicola staurospora Diederich, van den Boom & Aptroot 2001").should_not be_nil
|
96
135
|
parse("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should_not be_nil
|
97
|
-
value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman
|
136
|
+
value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman et E.A. Herrm.) Van der Walt et Arx 1981"
|
98
137
|
parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
|
99
138
|
parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
|
100
139
|
parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
|
@@ -114,11 +153,12 @@ describe ScientificName do
|
|
114
153
|
parse("Peltula coriacea Büdel, Henssen & Wessels 1986").should_not be_nil
|
115
154
|
#had to add no dot rule for trinomials without a rank to make it to work
|
116
155
|
parse("Saccharomyces drosophilae anon.").should_not be_nil
|
156
|
+
details("Saccharomyces drosophilae anon.").should == {:genus=>"Saccharomyces", :species=>"drosophilae", :authors=>{:names=>["anon."]}}
|
117
157
|
end
|
118
158
|
|
119
159
|
it 'should parse several authors with several years' do
|
120
160
|
parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003").should_not be_nil
|
121
|
-
value("Pseudocercospora dendrobii(H.C. Burnett1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun
|
161
|
+
value("Pseudocercospora dendrobii(H.C. Burnett1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun et Crous 2003"
|
122
162
|
canonical("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
|
123
163
|
details("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == {:orig_authors=>{:year=>"1883", :names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
|
124
164
|
end
|
@@ -150,7 +190,7 @@ describe ScientificName do
|
|
150
190
|
|
151
191
|
it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
|
152
192
|
parse("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should_not be_nil
|
153
|
-
value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall
|
193
|
+
value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall et D.E. Stuntz 1972"
|
154
194
|
details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:rank=>"var.", :value=>"zonatum"}, {:rank=>"f.", :value=>"parvum"}], :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum", :is_valid=>false}
|
155
195
|
end
|
156
196
|
|
@@ -185,6 +225,7 @@ describe ScientificName do
|
|
185
225
|
#invalid but happens
|
186
226
|
parse("Mycosphaerella eryngii (Fr. Duby) ex Oudem. 1897").should_not be_nil
|
187
227
|
parse("Mycosphaerella eryngii (Fr.ex Duby) ex Oudem. 1897").should_not be_nil
|
228
|
+
parse("Salmonella werahensis (Castellani) Hauduroy and Ehringer in Hauduroy 1937").should_not be_nil
|
188
229
|
end
|
189
230
|
|
190
231
|
it "should parse multiplication sign" do
|
@@ -222,9 +263,12 @@ describe ScientificName do
|
|
222
263
|
it "should parse name with subspecies without rank NOT BOTANICAL" do
|
223
264
|
name = "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
|
224
265
|
parse(name).should_not be_nil
|
225
|
-
value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall
|
266
|
+
value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972"
|
226
267
|
canonical(name).should == "Hydnellum scrobiculatum zonatum"
|
227
268
|
details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:rank=>"n/a", :value=>"zonatum"}, :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum"}
|
269
|
+
sp = "Begonia pingbienensis angustior"
|
270
|
+
parse(sp).should_not be_nil
|
271
|
+
details(sp).should == {:genus=>"Begonia", :species=>"pingbienensis", :subspecies=>{:rank=>"n/a", :value=>"angustior"}}
|
228
272
|
end
|
229
273
|
|
230
274
|
it "should not parse utf-8 chars in name part" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dimus-biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-04-11 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|