dimus-biodiversity 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/nnparse +10 -6
- data/lib/biodiversity/parser/scientific_name.rb +590 -135
- data/lib/biodiversity/parser/scientific_name.treetop +92 -18
- data/spec/parser/scientific_name.spec.rb +52 -8
- metadata +2 -2
data/bin/nnparse
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
require 'rubygems'
|
|
3
|
-
gem 'dimus-biodiversity' rescue gem 'biodiversity'
|
|
3
|
+
gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil
|
|
4
4
|
|
|
5
5
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
|
6
6
|
require 'biodiversity'
|
|
@@ -37,11 +37,15 @@ IO.foreach(ARGV[0]) do |n|
|
|
|
37
37
|
puts n
|
|
38
38
|
count += 1
|
|
39
39
|
else
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
begin
|
|
41
|
+
name_dict[:output] = parsed.value
|
|
42
|
+
name_dict[:canononical] = parsed.canonical
|
|
43
|
+
name_dict[:details] = parsed.details
|
|
44
|
+
name_dict[:parsed => true]
|
|
45
|
+
last_result = JSON.generate name_dict
|
|
46
|
+
rescue
|
|
47
|
+
puts 'PROBLEM: ' + n
|
|
48
|
+
end
|
|
45
49
|
end
|
|
46
50
|
end
|
|
47
51
|
o.write(last_result + "\n") if last_result
|
|
@@ -741,6 +741,10 @@ module ScientificName
|
|
|
741
741
|
elements[1]
|
|
742
742
|
end
|
|
743
743
|
|
|
744
|
+
def ex_sep
|
|
745
|
+
elements[2]
|
|
746
|
+
end
|
|
747
|
+
|
|
744
748
|
def space
|
|
745
749
|
elements[3]
|
|
746
750
|
end
|
|
@@ -821,13 +825,7 @@ module ScientificName
|
|
|
821
825
|
r7 = _nt_space
|
|
822
826
|
s5 << r7
|
|
823
827
|
if r7
|
|
824
|
-
|
|
825
|
-
r8 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
826
|
-
@index += 2
|
|
827
|
-
else
|
|
828
|
-
terminal_parse_failure("ex")
|
|
829
|
-
r8 = nil
|
|
830
|
-
end
|
|
828
|
+
r8 = _nt_ex_sep
|
|
831
829
|
s5 << r8
|
|
832
830
|
if r8
|
|
833
831
|
r9 = _nt_space
|
|
@@ -984,9 +982,44 @@ module ScientificName
|
|
|
984
982
|
elements[3]
|
|
985
983
|
end
|
|
986
984
|
|
|
985
|
+
def space
|
|
986
|
+
elements[5]
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
def space
|
|
990
|
+
elements[7]
|
|
991
|
+
end
|
|
992
|
+
|
|
993
|
+
def b
|
|
994
|
+
elements[8]
|
|
995
|
+
end
|
|
987
996
|
end
|
|
988
997
|
|
|
989
998
|
module OriginalAuthorsNamesFull1
|
|
999
|
+
def value
|
|
1000
|
+
"(" + a.value + " " + b.value + ")"
|
|
1001
|
+
end
|
|
1002
|
+
def details
|
|
1003
|
+
{:orig_authors => a.details[:authors], :year => b.details[:year]}
|
|
1004
|
+
end
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
module OriginalAuthorsNamesFull2
|
|
1008
|
+
def space
|
|
1009
|
+
elements[1]
|
|
1010
|
+
end
|
|
1011
|
+
|
|
1012
|
+
def a
|
|
1013
|
+
elements[2]
|
|
1014
|
+
end
|
|
1015
|
+
|
|
1016
|
+
def space
|
|
1017
|
+
elements[3]
|
|
1018
|
+
end
|
|
1019
|
+
|
|
1020
|
+
end
|
|
1021
|
+
|
|
1022
|
+
module OriginalAuthorsNamesFull3
|
|
990
1023
|
def value
|
|
991
1024
|
"(" + a.value + ")"
|
|
992
1025
|
end
|
|
@@ -1003,44 +1036,120 @@ module ScientificName
|
|
|
1003
1036
|
return cached
|
|
1004
1037
|
end
|
|
1005
1038
|
|
|
1006
|
-
i0
|
|
1039
|
+
i0 = index
|
|
1040
|
+
i1, s1 = index, []
|
|
1007
1041
|
if input.index("(", index) == index
|
|
1008
|
-
|
|
1042
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
1009
1043
|
@index += 1
|
|
1010
1044
|
else
|
|
1011
1045
|
terminal_parse_failure("(")
|
|
1012
|
-
|
|
1046
|
+
r2 = nil
|
|
1013
1047
|
end
|
|
1014
|
-
|
|
1015
|
-
if
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
if
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
if
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
if
|
|
1048
|
+
s1 << r2
|
|
1049
|
+
if r2
|
|
1050
|
+
r3 = _nt_space
|
|
1051
|
+
s1 << r3
|
|
1052
|
+
if r3
|
|
1053
|
+
r4 = _nt_authors_names
|
|
1054
|
+
s1 << r4
|
|
1055
|
+
if r4
|
|
1056
|
+
r5 = _nt_space
|
|
1057
|
+
s1 << r5
|
|
1058
|
+
if r5
|
|
1025
1059
|
if input.index(")", index) == index
|
|
1026
|
-
|
|
1060
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
1027
1061
|
@index += 1
|
|
1028
1062
|
else
|
|
1029
1063
|
terminal_parse_failure(")")
|
|
1030
|
-
|
|
1064
|
+
r6 = nil
|
|
1065
|
+
end
|
|
1066
|
+
s1 << r6
|
|
1067
|
+
if r6
|
|
1068
|
+
r7 = _nt_space
|
|
1069
|
+
s1 << r7
|
|
1070
|
+
if r7
|
|
1071
|
+
if input.index(Regexp.new('[,]'), index) == index
|
|
1072
|
+
r9 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
1073
|
+
@index += 1
|
|
1074
|
+
else
|
|
1075
|
+
r9 = nil
|
|
1076
|
+
end
|
|
1077
|
+
if r9
|
|
1078
|
+
r8 = r9
|
|
1079
|
+
else
|
|
1080
|
+
r8 = instantiate_node(SyntaxNode,input, index...index)
|
|
1081
|
+
end
|
|
1082
|
+
s1 << r8
|
|
1083
|
+
if r8
|
|
1084
|
+
r10 = _nt_space
|
|
1085
|
+
s1 << r10
|
|
1086
|
+
if r10
|
|
1087
|
+
r11 = _nt_year
|
|
1088
|
+
s1 << r11
|
|
1089
|
+
end
|
|
1090
|
+
end
|
|
1091
|
+
end
|
|
1031
1092
|
end
|
|
1032
|
-
s0 << r5
|
|
1033
1093
|
end
|
|
1034
1094
|
end
|
|
1035
1095
|
end
|
|
1036
1096
|
end
|
|
1037
|
-
if
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1097
|
+
if s1.last
|
|
1098
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
1099
|
+
r1.extend(OriginalAuthorsNamesFull0)
|
|
1100
|
+
r1.extend(OriginalAuthorsNamesFull1)
|
|
1041
1101
|
else
|
|
1042
|
-
self.index =
|
|
1043
|
-
|
|
1102
|
+
self.index = i1
|
|
1103
|
+
r1 = nil
|
|
1104
|
+
end
|
|
1105
|
+
if r1
|
|
1106
|
+
r0 = r1
|
|
1107
|
+
else
|
|
1108
|
+
i12, s12 = index, []
|
|
1109
|
+
if input.index("(", index) == index
|
|
1110
|
+
r13 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
1111
|
+
@index += 1
|
|
1112
|
+
else
|
|
1113
|
+
terminal_parse_failure("(")
|
|
1114
|
+
r13 = nil
|
|
1115
|
+
end
|
|
1116
|
+
s12 << r13
|
|
1117
|
+
if r13
|
|
1118
|
+
r14 = _nt_space
|
|
1119
|
+
s12 << r14
|
|
1120
|
+
if r14
|
|
1121
|
+
r15 = _nt_authors_names_full
|
|
1122
|
+
s12 << r15
|
|
1123
|
+
if r15
|
|
1124
|
+
r16 = _nt_space
|
|
1125
|
+
s12 << r16
|
|
1126
|
+
if r16
|
|
1127
|
+
if input.index(")", index) == index
|
|
1128
|
+
r17 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
1129
|
+
@index += 1
|
|
1130
|
+
else
|
|
1131
|
+
terminal_parse_failure(")")
|
|
1132
|
+
r17 = nil
|
|
1133
|
+
end
|
|
1134
|
+
s12 << r17
|
|
1135
|
+
end
|
|
1136
|
+
end
|
|
1137
|
+
end
|
|
1138
|
+
end
|
|
1139
|
+
if s12.last
|
|
1140
|
+
r12 = instantiate_node(SyntaxNode,input, i12...index, s12)
|
|
1141
|
+
r12.extend(OriginalAuthorsNamesFull2)
|
|
1142
|
+
r12.extend(OriginalAuthorsNamesFull3)
|
|
1143
|
+
else
|
|
1144
|
+
self.index = i12
|
|
1145
|
+
r12 = nil
|
|
1146
|
+
end
|
|
1147
|
+
if r12
|
|
1148
|
+
r0 = r12
|
|
1149
|
+
else
|
|
1150
|
+
self.index = i0
|
|
1151
|
+
r0 = nil
|
|
1152
|
+
end
|
|
1044
1153
|
end
|
|
1045
1154
|
|
|
1046
1155
|
node_cache[:original_authors_names_full][start_index] = r0
|
|
@@ -1135,6 +1244,10 @@ module ScientificName
|
|
|
1135
1244
|
elements[1]
|
|
1136
1245
|
end
|
|
1137
1246
|
|
|
1247
|
+
def ex_sep
|
|
1248
|
+
elements[2]
|
|
1249
|
+
end
|
|
1250
|
+
|
|
1138
1251
|
def space
|
|
1139
1252
|
elements[3]
|
|
1140
1253
|
end
|
|
@@ -1168,13 +1281,7 @@ module ScientificName
|
|
|
1168
1281
|
r2 = _nt_space
|
|
1169
1282
|
s0 << r2
|
|
1170
1283
|
if r2
|
|
1171
|
-
|
|
1172
|
-
r3 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
1173
|
-
@index += 2
|
|
1174
|
-
else
|
|
1175
|
-
terminal_parse_failure("ex")
|
|
1176
|
-
r3 = nil
|
|
1177
|
-
end
|
|
1284
|
+
r3 = _nt_ex_sep
|
|
1178
1285
|
s0 << r3
|
|
1179
1286
|
if r3
|
|
1180
1287
|
r4 = _nt_space
|
|
@@ -1290,6 +1397,45 @@ module ScientificName
|
|
|
1290
1397
|
return r0
|
|
1291
1398
|
end
|
|
1292
1399
|
|
|
1400
|
+
def _nt_ex_sep
|
|
1401
|
+
start_index = index
|
|
1402
|
+
if node_cache[:ex_sep].has_key?(index)
|
|
1403
|
+
cached = node_cache[:ex_sep][index]
|
|
1404
|
+
@index = cached.interval.end if cached
|
|
1405
|
+
return cached
|
|
1406
|
+
end
|
|
1407
|
+
|
|
1408
|
+
i0 = index
|
|
1409
|
+
if input.index("ex", index) == index
|
|
1410
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
1411
|
+
@index += 2
|
|
1412
|
+
else
|
|
1413
|
+
terminal_parse_failure("ex")
|
|
1414
|
+
r1 = nil
|
|
1415
|
+
end
|
|
1416
|
+
if r1
|
|
1417
|
+
r0 = r1
|
|
1418
|
+
else
|
|
1419
|
+
if input.index("in", index) == index
|
|
1420
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
1421
|
+
@index += 2
|
|
1422
|
+
else
|
|
1423
|
+
terminal_parse_failure("in")
|
|
1424
|
+
r2 = nil
|
|
1425
|
+
end
|
|
1426
|
+
if r2
|
|
1427
|
+
r0 = r2
|
|
1428
|
+
else
|
|
1429
|
+
self.index = i0
|
|
1430
|
+
r0 = nil
|
|
1431
|
+
end
|
|
1432
|
+
end
|
|
1433
|
+
|
|
1434
|
+
node_cache[:ex_sep][start_index] = r0
|
|
1435
|
+
|
|
1436
|
+
return r0
|
|
1437
|
+
end
|
|
1438
|
+
|
|
1293
1439
|
module AuthorsNames0
|
|
1294
1440
|
def a
|
|
1295
1441
|
elements[0]
|
|
@@ -1378,7 +1524,7 @@ module ScientificName
|
|
|
1378
1524
|
module AuthorNameSeparator0
|
|
1379
1525
|
def apply(a,b)
|
|
1380
1526
|
sep = text_value.strip
|
|
1381
|
-
sep = " "
|
|
1527
|
+
sep = " et" if ["&","and","et"].include? sep
|
|
1382
1528
|
a.value + sep + " " + b.value
|
|
1383
1529
|
end
|
|
1384
1530
|
|
|
@@ -1418,8 +1564,32 @@ module ScientificName
|
|
|
1418
1564
|
r0 = r2
|
|
1419
1565
|
r0.extend(AuthorNameSeparator0)
|
|
1420
1566
|
else
|
|
1421
|
-
|
|
1422
|
-
|
|
1567
|
+
if input.index("and", index) == index
|
|
1568
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
1569
|
+
@index += 3
|
|
1570
|
+
else
|
|
1571
|
+
terminal_parse_failure("and")
|
|
1572
|
+
r3 = nil
|
|
1573
|
+
end
|
|
1574
|
+
if r3
|
|
1575
|
+
r0 = r3
|
|
1576
|
+
r0.extend(AuthorNameSeparator0)
|
|
1577
|
+
else
|
|
1578
|
+
if input.index("et", index) == index
|
|
1579
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
1580
|
+
@index += 2
|
|
1581
|
+
else
|
|
1582
|
+
terminal_parse_failure("et")
|
|
1583
|
+
r4 = nil
|
|
1584
|
+
end
|
|
1585
|
+
if r4
|
|
1586
|
+
r0 = r4
|
|
1587
|
+
r0.extend(AuthorNameSeparator0)
|
|
1588
|
+
else
|
|
1589
|
+
self.index = i0
|
|
1590
|
+
r0 = nil
|
|
1591
|
+
end
|
|
1592
|
+
end
|
|
1423
1593
|
end
|
|
1424
1594
|
end
|
|
1425
1595
|
|
|
@@ -2104,17 +2274,17 @@ module ScientificName
|
|
|
2104
2274
|
s13 << r17
|
|
2105
2275
|
if r17
|
|
2106
2276
|
i18 = index
|
|
2107
|
-
if input.index(Regexp.new('[
|
|
2277
|
+
if input.index(Regexp.new('[\\.]'), index) == index
|
|
2108
2278
|
r19 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
2109
2279
|
@index += 1
|
|
2110
2280
|
else
|
|
2111
2281
|
r19 = nil
|
|
2112
2282
|
end
|
|
2113
2283
|
if r19
|
|
2284
|
+
r18 = nil
|
|
2285
|
+
else
|
|
2114
2286
|
self.index = i18
|
|
2115
2287
|
r18 = instantiate_node(SyntaxNode,input, index...index)
|
|
2116
|
-
else
|
|
2117
|
-
r18 = nil
|
|
2118
2288
|
end
|
|
2119
2289
|
s13 << r18
|
|
2120
2290
|
end
|
|
@@ -2392,6 +2562,9 @@ module ScientificName
|
|
|
2392
2562
|
def value
|
|
2393
2563
|
a.value + b.value
|
|
2394
2564
|
end
|
|
2565
|
+
def details
|
|
2566
|
+
{:editorial_markup => value, :is_valid => false}
|
|
2567
|
+
end
|
|
2395
2568
|
end
|
|
2396
2569
|
|
|
2397
2570
|
def _nt_editorials
|
|
@@ -3277,25 +3450,40 @@ module ScientificName
|
|
|
3277
3450
|
return r0
|
|
3278
3451
|
end
|
|
3279
3452
|
|
|
3280
|
-
module
|
|
3453
|
+
module CapLatinWord0
|
|
3454
|
+
def a
|
|
3455
|
+
elements[0]
|
|
3456
|
+
end
|
|
3457
|
+
|
|
3458
|
+
def b
|
|
3459
|
+
elements[1]
|
|
3460
|
+
end
|
|
3281
3461
|
end
|
|
3282
3462
|
|
|
3283
|
-
module
|
|
3284
|
-
def value
|
|
3285
|
-
text_value.
|
|
3463
|
+
module CapLatinWord1
|
|
3464
|
+
def value
|
|
3465
|
+
a.text_value + b.value
|
|
3466
|
+
end
|
|
3467
|
+
|
|
3468
|
+
def canonical
|
|
3469
|
+
value
|
|
3470
|
+
end
|
|
3471
|
+
|
|
3472
|
+
def details
|
|
3473
|
+
{:uninomial => value}
|
|
3286
3474
|
end
|
|
3287
3475
|
end
|
|
3288
3476
|
|
|
3289
|
-
def
|
|
3477
|
+
def _nt_cap_latin_word
|
|
3290
3478
|
start_index = index
|
|
3291
|
-
if node_cache[:
|
|
3292
|
-
cached = node_cache[:
|
|
3479
|
+
if node_cache[:cap_latin_word].has_key?(index)
|
|
3480
|
+
cached = node_cache[:cap_latin_word][index]
|
|
3293
3481
|
@index = cached.interval.end if cached
|
|
3294
3482
|
return cached
|
|
3295
3483
|
end
|
|
3296
3484
|
|
|
3297
3485
|
i0, s0 = index, []
|
|
3298
|
-
if input.index(Regexp.new('[
|
|
3486
|
+
if input.index(Regexp.new('[A-Z]'), index) == index
|
|
3299
3487
|
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3300
3488
|
@index += 1
|
|
3301
3489
|
else
|
|
@@ -3303,117 +3491,303 @@ module ScientificName
|
|
|
3303
3491
|
end
|
|
3304
3492
|
s0 << r1
|
|
3305
3493
|
if r1
|
|
3306
|
-
|
|
3307
|
-
loop do
|
|
3308
|
-
if input.index(Regexp.new('[a-z\\-ëüäöï]'), index) == index
|
|
3309
|
-
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3310
|
-
@index += 1
|
|
3311
|
-
else
|
|
3312
|
-
r3 = nil
|
|
3313
|
-
end
|
|
3314
|
-
if r3
|
|
3315
|
-
s2 << r3
|
|
3316
|
-
else
|
|
3317
|
-
break
|
|
3318
|
-
end
|
|
3319
|
-
end
|
|
3320
|
-
if s2.empty?
|
|
3321
|
-
self.index = i2
|
|
3322
|
-
r2 = nil
|
|
3323
|
-
else
|
|
3324
|
-
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
|
3325
|
-
end
|
|
3494
|
+
r2 = _nt_latin_word
|
|
3326
3495
|
s0 << r2
|
|
3327
3496
|
end
|
|
3328
3497
|
if s0.last
|
|
3329
3498
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
3330
|
-
r0.extend(
|
|
3331
|
-
r0.extend(
|
|
3499
|
+
r0.extend(CapLatinWord0)
|
|
3500
|
+
r0.extend(CapLatinWord1)
|
|
3332
3501
|
else
|
|
3333
3502
|
self.index = i0
|
|
3334
3503
|
r0 = nil
|
|
3335
3504
|
end
|
|
3336
3505
|
|
|
3337
|
-
node_cache[:
|
|
3506
|
+
node_cache[:cap_latin_word][start_index] = r0
|
|
3338
3507
|
|
|
3339
3508
|
return r0
|
|
3340
3509
|
end
|
|
3341
3510
|
|
|
3342
|
-
module
|
|
3511
|
+
module LatinWord0
|
|
3512
|
+
def a
|
|
3513
|
+
elements[0]
|
|
3514
|
+
end
|
|
3515
|
+
|
|
3516
|
+
def b
|
|
3517
|
+
elements[1]
|
|
3518
|
+
end
|
|
3343
3519
|
end
|
|
3344
3520
|
|
|
3345
|
-
module
|
|
3521
|
+
module LatinWord1
|
|
3346
3522
|
def value
|
|
3347
|
-
text_value.
|
|
3348
|
-
end
|
|
3349
|
-
|
|
3350
|
-
def canonical
|
|
3351
|
-
text_value.strip
|
|
3352
|
-
end
|
|
3353
|
-
|
|
3354
|
-
def details
|
|
3355
|
-
{:uninomial => value}
|
|
3523
|
+
a.text_value + b.value
|
|
3356
3524
|
end
|
|
3357
3525
|
end
|
|
3358
3526
|
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
cached = node_cache[:cap_latin_word][index]
|
|
3363
|
-
@index = cached.interval.end if cached
|
|
3364
|
-
return cached
|
|
3527
|
+
module LatinWord2
|
|
3528
|
+
def a
|
|
3529
|
+
elements[0]
|
|
3365
3530
|
end
|
|
3366
3531
|
|
|
3367
|
-
|
|
3368
|
-
|
|
3369
|
-
|
|
3532
|
+
def b
|
|
3533
|
+
elements[1]
|
|
3534
|
+
end
|
|
3535
|
+
end
|
|
3536
|
+
|
|
3537
|
+
module LatinWord3
|
|
3538
|
+
def value
|
|
3539
|
+
a.value + b.value
|
|
3540
|
+
end
|
|
3541
|
+
end
|
|
3542
|
+
|
|
3543
|
+
def _nt_latin_word
|
|
3544
|
+
start_index = index
|
|
3545
|
+
if node_cache[:latin_word].has_key?(index)
|
|
3546
|
+
cached = node_cache[:latin_word][index]
|
|
3547
|
+
@index = cached.interval.end if cached
|
|
3548
|
+
return cached
|
|
3549
|
+
end
|
|
3550
|
+
|
|
3551
|
+
i0 = index
|
|
3552
|
+
i1, s1 = index, []
|
|
3553
|
+
if input.index(Regexp.new('[a-zë]'), index) == index
|
|
3554
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3370
3555
|
@index += 1
|
|
3371
3556
|
else
|
|
3557
|
+
r2 = nil
|
|
3558
|
+
end
|
|
3559
|
+
s1 << r2
|
|
3560
|
+
if r2
|
|
3561
|
+
r3 = _nt_full_name_letters
|
|
3562
|
+
s1 << r3
|
|
3563
|
+
end
|
|
3564
|
+
if s1.last
|
|
3565
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
3566
|
+
r1.extend(LatinWord0)
|
|
3567
|
+
r1.extend(LatinWord1)
|
|
3568
|
+
else
|
|
3569
|
+
self.index = i1
|
|
3372
3570
|
r1 = nil
|
|
3373
3571
|
end
|
|
3374
|
-
s0 << r1
|
|
3375
3572
|
if r1
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3573
|
+
r0 = r1
|
|
3574
|
+
else
|
|
3575
|
+
i4, s4 = index, []
|
|
3576
|
+
r5 = _nt_digraph
|
|
3577
|
+
s4 << r5
|
|
3578
|
+
if r5
|
|
3579
|
+
r6 = _nt_full_name_letters
|
|
3580
|
+
s4 << r6
|
|
3581
|
+
end
|
|
3582
|
+
if s4.last
|
|
3583
|
+
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
|
3584
|
+
r4.extend(LatinWord2)
|
|
3585
|
+
r4.extend(LatinWord3)
|
|
3379
3586
|
else
|
|
3380
|
-
|
|
3587
|
+
self.index = i4
|
|
3588
|
+
r4 = nil
|
|
3381
3589
|
end
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3590
|
+
if r4
|
|
3591
|
+
r0 = r4
|
|
3592
|
+
else
|
|
3593
|
+
self.index = i0
|
|
3594
|
+
r0 = nil
|
|
3595
|
+
end
|
|
3596
|
+
end
|
|
3597
|
+
|
|
3598
|
+
node_cache[:latin_word][start_index] = r0
|
|
3599
|
+
|
|
3600
|
+
return r0
|
|
3601
|
+
end
|
|
3602
|
+
|
|
3603
|
+
module FullNameLetters0
|
|
3604
|
+
def a
|
|
3605
|
+
elements[0]
|
|
3606
|
+
end
|
|
3607
|
+
|
|
3608
|
+
def b
|
|
3609
|
+
elements[1]
|
|
3610
|
+
end
|
|
3611
|
+
end
|
|
3612
|
+
|
|
3613
|
+
module FullNameLetters1
|
|
3614
|
+
def value
|
|
3615
|
+
a.value + b.value
|
|
3616
|
+
end
|
|
3617
|
+
end
|
|
3618
|
+
|
|
3619
|
+
module FullNameLetters2
|
|
3620
|
+
def a
|
|
3621
|
+
elements[0]
|
|
3622
|
+
end
|
|
3623
|
+
|
|
3624
|
+
def b
|
|
3625
|
+
elements[1]
|
|
3626
|
+
end
|
|
3627
|
+
|
|
3628
|
+
def c
|
|
3629
|
+
elements[2]
|
|
3630
|
+
end
|
|
3631
|
+
end
|
|
3632
|
+
|
|
3633
|
+
module FullNameLetters3
|
|
3634
|
+
def value
|
|
3635
|
+
a.value + b.value + c.value
|
|
3636
|
+
end
|
|
3637
|
+
end
|
|
3638
|
+
|
|
3639
|
+
def _nt_full_name_letters
|
|
3640
|
+
start_index = index
|
|
3641
|
+
if node_cache[:full_name_letters].has_key?(index)
|
|
3642
|
+
cached = node_cache[:full_name_letters][index]
|
|
3643
|
+
@index = cached.interval.end if cached
|
|
3644
|
+
return cached
|
|
3645
|
+
end
|
|
3646
|
+
|
|
3647
|
+
i0 = index
|
|
3648
|
+
i1, s1 = index, []
|
|
3649
|
+
r2 = _nt_digraph
|
|
3650
|
+
s1 << r2
|
|
3651
|
+
if r2
|
|
3652
|
+
r3 = _nt_full_name_letters
|
|
3653
|
+
s1 << r3
|
|
3654
|
+
end
|
|
3655
|
+
if s1.last
|
|
3656
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
3657
|
+
r1.extend(FullNameLetters0)
|
|
3658
|
+
r1.extend(FullNameLetters1)
|
|
3659
|
+
else
|
|
3660
|
+
self.index = i1
|
|
3661
|
+
r1 = nil
|
|
3662
|
+
end
|
|
3663
|
+
if r1
|
|
3664
|
+
r0 = r1
|
|
3665
|
+
else
|
|
3666
|
+
i4, s4 = index, []
|
|
3667
|
+
r5 = _nt_valid_name_letters
|
|
3668
|
+
s4 << r5
|
|
3669
|
+
if r5
|
|
3670
|
+
r6 = _nt_digraph
|
|
3671
|
+
s4 << r6
|
|
3672
|
+
if r6
|
|
3673
|
+
r7 = _nt_full_name_letters
|
|
3674
|
+
s4 << r7
|
|
3397
3675
|
end
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3676
|
+
end
|
|
3677
|
+
if s4.last
|
|
3678
|
+
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
|
3679
|
+
r4.extend(FullNameLetters2)
|
|
3680
|
+
r4.extend(FullNameLetters3)
|
|
3681
|
+
else
|
|
3682
|
+
self.index = i4
|
|
3683
|
+
r4 = nil
|
|
3684
|
+
end
|
|
3685
|
+
if r4
|
|
3686
|
+
r0 = r4
|
|
3687
|
+
else
|
|
3688
|
+
r8 = _nt_valid_name_letters
|
|
3689
|
+
if r8
|
|
3690
|
+
r0 = r8
|
|
3401
3691
|
else
|
|
3402
|
-
|
|
3692
|
+
self.index = i0
|
|
3693
|
+
r0 = nil
|
|
3403
3694
|
end
|
|
3404
|
-
s0 << r3
|
|
3405
3695
|
end
|
|
3406
3696
|
end
|
|
3407
|
-
|
|
3408
|
-
|
|
3409
|
-
|
|
3410
|
-
|
|
3411
|
-
|
|
3697
|
+
|
|
3698
|
+
node_cache[:full_name_letters][start_index] = r0
|
|
3699
|
+
|
|
3700
|
+
return r0
|
|
3701
|
+
end
|
|
3702
|
+
|
|
3703
|
+
module ValidNameLetters0
|
|
3704
|
+
def value
|
|
3705
|
+
text_value
|
|
3706
|
+
end
|
|
3707
|
+
end
|
|
3708
|
+
|
|
3709
|
+
def _nt_valid_name_letters
|
|
3710
|
+
start_index = index
|
|
3711
|
+
if node_cache[:valid_name_letters].has_key?(index)
|
|
3712
|
+
cached = node_cache[:valid_name_letters][index]
|
|
3713
|
+
@index = cached.interval.end if cached
|
|
3714
|
+
return cached
|
|
3715
|
+
end
|
|
3716
|
+
|
|
3717
|
+
s0, i0 = [], index
|
|
3718
|
+
loop do
|
|
3719
|
+
if input.index(Regexp.new('[a-z\\-ëüäöï]'), index) == index
|
|
3720
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3721
|
+
@index += 1
|
|
3722
|
+
else
|
|
3723
|
+
r1 = nil
|
|
3724
|
+
end
|
|
3725
|
+
if r1
|
|
3726
|
+
s0 << r1
|
|
3727
|
+
else
|
|
3728
|
+
break
|
|
3729
|
+
end
|
|
3730
|
+
end
|
|
3731
|
+
if s0.empty?
|
|
3412
3732
|
self.index = i0
|
|
3413
3733
|
r0 = nil
|
|
3734
|
+
else
|
|
3735
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
3736
|
+
r0.extend(ValidNameLetters0)
|
|
3414
3737
|
end
|
|
3415
3738
|
|
|
3416
|
-
node_cache[:
|
|
3739
|
+
node_cache[:valid_name_letters][start_index] = r0
|
|
3740
|
+
|
|
3741
|
+
return r0
|
|
3742
|
+
end
|
|
3743
|
+
|
|
3744
|
+
module Digraph0
|
|
3745
|
+
def value
|
|
3746
|
+
'ae'
|
|
3747
|
+
end
|
|
3748
|
+
end
|
|
3749
|
+
|
|
3750
|
+
module Digraph1
|
|
3751
|
+
def value
|
|
3752
|
+
'oe'
|
|
3753
|
+
end
|
|
3754
|
+
end
|
|
3755
|
+
|
|
3756
|
+
def _nt_digraph
|
|
3757
|
+
start_index = index
|
|
3758
|
+
if node_cache[:digraph].has_key?(index)
|
|
3759
|
+
cached = node_cache[:digraph][index]
|
|
3760
|
+
@index = cached.interval.end if cached
|
|
3761
|
+
return cached
|
|
3762
|
+
end
|
|
3763
|
+
|
|
3764
|
+
i0 = index
|
|
3765
|
+
if input.index(Regexp.new('[æ]'), index) == index
|
|
3766
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3767
|
+
r1.extend(Digraph0)
|
|
3768
|
+
@index += 1
|
|
3769
|
+
else
|
|
3770
|
+
r1 = nil
|
|
3771
|
+
end
|
|
3772
|
+
if r1
|
|
3773
|
+
r0 = r1
|
|
3774
|
+
else
|
|
3775
|
+
if input.index(Regexp.new('[œ]'), index) == index
|
|
3776
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3777
|
+
r2.extend(Digraph1)
|
|
3778
|
+
@index += 1
|
|
3779
|
+
else
|
|
3780
|
+
r2 = nil
|
|
3781
|
+
end
|
|
3782
|
+
if r2
|
|
3783
|
+
r0 = r2
|
|
3784
|
+
else
|
|
3785
|
+
self.index = i0
|
|
3786
|
+
r0 = nil
|
|
3787
|
+
end
|
|
3788
|
+
end
|
|
3789
|
+
|
|
3790
|
+
node_cache[:digraph][start_index] = r0
|
|
3417
3791
|
|
|
3418
3792
|
return r0
|
|
3419
3793
|
end
|
|
@@ -3494,29 +3868,110 @@ module ScientificName
|
|
|
3494
3868
|
return cached
|
|
3495
3869
|
end
|
|
3496
3870
|
|
|
3497
|
-
|
|
3871
|
+
i0 = index
|
|
3872
|
+
r1 = _nt_year_with_character
|
|
3873
|
+
if r1
|
|
3874
|
+
r0 = r1
|
|
3875
|
+
else
|
|
3876
|
+
s2, i2 = [], index
|
|
3877
|
+
loop do
|
|
3878
|
+
if input.index(Regexp.new('[0-9\\?]'), index) == index
|
|
3879
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3880
|
+
@index += 1
|
|
3881
|
+
else
|
|
3882
|
+
r3 = nil
|
|
3883
|
+
end
|
|
3884
|
+
if r3
|
|
3885
|
+
s2 << r3
|
|
3886
|
+
else
|
|
3887
|
+
break
|
|
3888
|
+
end
|
|
3889
|
+
end
|
|
3890
|
+
if s2.empty?
|
|
3891
|
+
self.index = i2
|
|
3892
|
+
r2 = nil
|
|
3893
|
+
else
|
|
3894
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
|
3895
|
+
r2.extend(Year0)
|
|
3896
|
+
end
|
|
3897
|
+
if r2
|
|
3898
|
+
r0 = r2
|
|
3899
|
+
else
|
|
3900
|
+
self.index = i0
|
|
3901
|
+
r0 = nil
|
|
3902
|
+
end
|
|
3903
|
+
end
|
|
3904
|
+
|
|
3905
|
+
node_cache[:year][start_index] = r0
|
|
3906
|
+
|
|
3907
|
+
return r0
|
|
3908
|
+
end
|
|
3909
|
+
|
|
3910
|
+
module YearWithCharacter0
|
|
3911
|
+
def a
|
|
3912
|
+
elements[0]
|
|
3913
|
+
end
|
|
3914
|
+
|
|
3915
|
+
end
|
|
3916
|
+
|
|
3917
|
+
module YearWithCharacter1
|
|
3918
|
+
def value
|
|
3919
|
+
a.text_value
|
|
3920
|
+
end
|
|
3921
|
+
def details
|
|
3922
|
+
{:year => value}
|
|
3923
|
+
end
|
|
3924
|
+
end
|
|
3925
|
+
|
|
3926
|
+
def _nt_year_with_character
|
|
3927
|
+
start_index = index
|
|
3928
|
+
if node_cache[:year_with_character].has_key?(index)
|
|
3929
|
+
cached = node_cache[:year_with_character][index]
|
|
3930
|
+
@index = cached.interval.end if cached
|
|
3931
|
+
return cached
|
|
3932
|
+
end
|
|
3933
|
+
|
|
3934
|
+
i0, s0 = index, []
|
|
3935
|
+
s1, i1 = [], index
|
|
3498
3936
|
loop do
|
|
3499
3937
|
if input.index(Regexp.new('[0-9\\?]'), index) == index
|
|
3500
|
-
|
|
3938
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3501
3939
|
@index += 1
|
|
3502
3940
|
else
|
|
3503
|
-
|
|
3941
|
+
r2 = nil
|
|
3504
3942
|
end
|
|
3505
|
-
if
|
|
3506
|
-
|
|
3943
|
+
if r2
|
|
3944
|
+
s1 << r2
|
|
3507
3945
|
else
|
|
3508
3946
|
break
|
|
3509
3947
|
end
|
|
3510
3948
|
end
|
|
3511
|
-
if
|
|
3512
|
-
self.index =
|
|
3513
|
-
|
|
3949
|
+
if s1.empty?
|
|
3950
|
+
self.index = i1
|
|
3951
|
+
r1 = nil
|
|
3514
3952
|
else
|
|
3953
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
3954
|
+
end
|
|
3955
|
+
s0 << r1
|
|
3956
|
+
if r1
|
|
3957
|
+
if input.index(Regexp.new('[a-zA-Z]'), index) == index
|
|
3958
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3959
|
+
@index += 1
|
|
3960
|
+
else
|
|
3961
|
+
r3 = nil
|
|
3962
|
+
end
|
|
3963
|
+
s0 << r3
|
|
3964
|
+
end
|
|
3965
|
+
if s0.last
|
|
3515
3966
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
3516
|
-
r0.extend(
|
|
3967
|
+
r0.extend(YearWithCharacter0)
|
|
3968
|
+
r0.extend(YearWithCharacter1)
|
|
3969
|
+
else
|
|
3970
|
+
self.index = i0
|
|
3971
|
+
r0 = nil
|
|
3517
3972
|
end
|
|
3518
3973
|
|
|
3519
|
-
node_cache[:
|
|
3974
|
+
node_cache[:year_with_character][start_index] = r0
|
|
3520
3975
|
|
|
3521
3976
|
return r0
|
|
3522
3977
|
end
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
1
2
|
grammar ScientificName
|
|
2
3
|
|
|
3
4
|
rule composite_scientific_name
|
|
@@ -27,7 +28,7 @@ grammar ScientificName
|
|
|
27
28
|
end
|
|
28
29
|
}
|
|
29
30
|
/
|
|
30
|
-
scientific_name
|
|
31
|
+
scientific_name
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
rule scientific_name
|
|
@@ -137,7 +138,7 @@ grammar ScientificName
|
|
|
137
138
|
end
|
|
138
139
|
}
|
|
139
140
|
/
|
|
140
|
-
a:simple_authors_part space
|
|
141
|
+
a:simple_authors_part space ex_sep space b:simple_authors_part {
|
|
141
142
|
def value
|
|
142
143
|
a.value + " ex " + b.value
|
|
143
144
|
end
|
|
@@ -179,6 +180,15 @@ grammar ScientificName
|
|
|
179
180
|
end
|
|
180
181
|
|
|
181
182
|
rule original_authors_names_full
|
|
183
|
+
"(" space a:authors_names space ")" space [,]? space b:year {
|
|
184
|
+
def value
|
|
185
|
+
"(" + a.value + " " + b.value + ")"
|
|
186
|
+
end
|
|
187
|
+
def details
|
|
188
|
+
{:orig_authors => a.details[:authors], :year => b.details[:year]}
|
|
189
|
+
end
|
|
190
|
+
}
|
|
191
|
+
/
|
|
182
192
|
"(" space a:authors_names_full space ")" {
|
|
183
193
|
def value
|
|
184
194
|
"(" + a.value + ")"
|
|
@@ -202,7 +212,7 @@ grammar ScientificName
|
|
|
202
212
|
end
|
|
203
213
|
|
|
204
214
|
rule authors_revised_name
|
|
205
|
-
a:authors_names_full space
|
|
215
|
+
a:authors_names_full space ex_sep space b:authors_names_full {
|
|
206
216
|
def value
|
|
207
217
|
a.value + " ex " + b.value
|
|
208
218
|
end
|
|
@@ -224,6 +234,10 @@ grammar ScientificName
|
|
|
224
234
|
/
|
|
225
235
|
authors_names
|
|
226
236
|
end
|
|
237
|
+
|
|
238
|
+
rule ex_sep
|
|
239
|
+
("ex"/"in")
|
|
240
|
+
end
|
|
227
241
|
|
|
228
242
|
rule authors_names
|
|
229
243
|
a:author_name space sep:author_name_separator space b:authors_names {
|
|
@@ -240,10 +254,10 @@ grammar ScientificName
|
|
|
240
254
|
end
|
|
241
255
|
|
|
242
256
|
rule author_name_separator
|
|
243
|
-
("&"/",") {
|
|
257
|
+
("&"/","/"and"/"et") {
|
|
244
258
|
def apply(a,b)
|
|
245
259
|
sep = text_value.strip
|
|
246
|
-
sep = " "
|
|
260
|
+
sep = " et" if ["&","and","et"].include? sep
|
|
247
261
|
a.value + sep + " " + b.value
|
|
248
262
|
end
|
|
249
263
|
|
|
@@ -315,7 +329,7 @@ grammar ScientificName
|
|
|
315
329
|
end
|
|
316
330
|
}
|
|
317
331
|
/
|
|
318
|
-
space a:species_name space b:latin_word
|
|
332
|
+
space a:species_name space b:latin_word ![\.] {
|
|
319
333
|
def value
|
|
320
334
|
a.value + " " + b.value
|
|
321
335
|
end
|
|
@@ -385,6 +399,9 @@ grammar ScientificName
|
|
|
385
399
|
space a:rank space [&]? space b:editorials {
|
|
386
400
|
def value
|
|
387
401
|
a.value + b.value
|
|
402
|
+
end
|
|
403
|
+
def details
|
|
404
|
+
{:editorial_markup => value, :is_valid => false}
|
|
388
405
|
end
|
|
389
406
|
}
|
|
390
407
|
/
|
|
@@ -483,23 +500,15 @@ grammar ScientificName
|
|
|
483
500
|
end
|
|
484
501
|
}
|
|
485
502
|
end
|
|
486
|
-
|
|
487
|
-
rule latin_word
|
|
488
|
-
[a-zë] [a-z\-ëüäöï]+ {
|
|
489
|
-
def value
|
|
490
|
-
text_value.strip
|
|
491
|
-
end
|
|
492
|
-
}
|
|
493
|
-
end
|
|
494
503
|
|
|
495
504
|
rule cap_latin_word
|
|
496
|
-
[A-Z]
|
|
505
|
+
a:[A-Z] b:latin_word {
|
|
497
506
|
def value
|
|
498
|
-
text_value.
|
|
507
|
+
a.text_value + b.value
|
|
499
508
|
end
|
|
500
509
|
|
|
501
510
|
def canonical
|
|
502
|
-
|
|
511
|
+
value
|
|
503
512
|
end
|
|
504
513
|
|
|
505
514
|
def details
|
|
@@ -507,7 +516,59 @@ grammar ScientificName
|
|
|
507
516
|
end
|
|
508
517
|
}
|
|
509
518
|
end
|
|
519
|
+
|
|
520
|
+
rule latin_word
|
|
521
|
+
a:[a-zë] b:full_name_letters {
|
|
522
|
+
def value
|
|
523
|
+
a.text_value + b.value
|
|
524
|
+
end
|
|
525
|
+
}
|
|
526
|
+
/
|
|
527
|
+
a:digraph b:full_name_letters {
|
|
528
|
+
def value
|
|
529
|
+
a.value + b.value
|
|
530
|
+
end
|
|
531
|
+
}
|
|
532
|
+
end
|
|
510
533
|
|
|
534
|
+
rule full_name_letters
|
|
535
|
+
a:digraph b:full_name_letters {
|
|
536
|
+
def value
|
|
537
|
+
a.value + b.value
|
|
538
|
+
end
|
|
539
|
+
}
|
|
540
|
+
/
|
|
541
|
+
a:valid_name_letters b:digraph c:full_name_letters {
|
|
542
|
+
def value
|
|
543
|
+
a.value + b.value + c.value
|
|
544
|
+
end
|
|
545
|
+
}
|
|
546
|
+
/
|
|
547
|
+
valid_name_letters
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
rule valid_name_letters
|
|
551
|
+
[a-z\-ëüäöï]+ {
|
|
552
|
+
def value
|
|
553
|
+
text_value
|
|
554
|
+
end
|
|
555
|
+
}
|
|
556
|
+
end
|
|
557
|
+
|
|
558
|
+
rule digraph
|
|
559
|
+
[æ] {
|
|
560
|
+
def value
|
|
561
|
+
'ae'
|
|
562
|
+
end
|
|
563
|
+
}
|
|
564
|
+
/
|
|
565
|
+
[œ] {
|
|
566
|
+
def value
|
|
567
|
+
'oe'
|
|
568
|
+
end
|
|
569
|
+
}
|
|
570
|
+
end
|
|
571
|
+
|
|
511
572
|
rule hybrid_separator
|
|
512
573
|
("x"/"X"/"×") {
|
|
513
574
|
def value
|
|
@@ -517,6 +578,8 @@ grammar ScientificName
|
|
|
517
578
|
end
|
|
518
579
|
|
|
519
580
|
rule year
|
|
581
|
+
year_with_character
|
|
582
|
+
/
|
|
520
583
|
[0-9\?]+ {
|
|
521
584
|
def value
|
|
522
585
|
text_value.strip
|
|
@@ -526,7 +589,18 @@ grammar ScientificName
|
|
|
526
589
|
end
|
|
527
590
|
}
|
|
528
591
|
end
|
|
529
|
-
|
|
592
|
+
|
|
593
|
+
rule year_with_character
|
|
594
|
+
a:[0-9\?]+ [a-zA-Z] {
|
|
595
|
+
def value
|
|
596
|
+
a.text_value
|
|
597
|
+
end
|
|
598
|
+
def details
|
|
599
|
+
{:year => value}
|
|
600
|
+
end
|
|
601
|
+
}
|
|
602
|
+
end
|
|
603
|
+
|
|
530
604
|
rule space
|
|
531
605
|
[\s]*
|
|
532
606
|
end
|
|
@@ -51,6 +51,31 @@ describe ScientificName do
|
|
|
51
51
|
details(sn).should == {:subgenus=>"Amerigo", :authors=>{:year=>"1999", :names=>["Author"]}, :species=>"pealeii", :genus=>"Doriteuthis"}
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
+
it 'should parse æ in the name' do
|
|
55
|
+
names = [
|
|
56
|
+
["Læptura laetifica Dow, 1913", "Laeptura laetifica Dow 1913"],
|
|
57
|
+
["Leptura lætifica Dow, 1913", "Leptura laetifica Dow 1913"],
|
|
58
|
+
["Leptura leætifica Dow, 1913", "Leptura leaetifica Dow 1913"],
|
|
59
|
+
["Leæptura laetifica Dow, 1913", "Leaeptura laetifica Dow 1913"],
|
|
60
|
+
["Leœptura laetifica Dow, 1913", "Leoeptura laetifica Dow 1913"]
|
|
61
|
+
]
|
|
62
|
+
names.each do |name_pair|
|
|
63
|
+
parse(name_pair[0]).should_not be_nil
|
|
64
|
+
value(name_pair[0]).should == name_pair[1]
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it 'should parse year' do
|
|
69
|
+
sn = "Platypus bicaudatulus Schedl 1935"
|
|
70
|
+
parse(sn).should_not be_nil
|
|
71
|
+
value(sn).should == "Platypus bicaudatulus Schedl 1935"
|
|
72
|
+
sn = "Platypus bicaudatulus Schedl, 1935h"
|
|
73
|
+
parse(sn).should_not be_nil
|
|
74
|
+
value(sn).should == "Platypus bicaudatulus Schedl 1935"
|
|
75
|
+
details(sn).should == {:genus=>"Platypus", :species=>"bicaudatulus", :authors=>{:names=>["Schedl"], :year=>"1935"}}
|
|
76
|
+
parse("Platypus bicaudatulus Schedl, 1935B").should_not be_nil
|
|
77
|
+
end
|
|
78
|
+
|
|
54
79
|
it 'should parse species autonym for complex subspecies authorships' do
|
|
55
80
|
parse("Aus bus Linn. var. bus").should_not be_nil
|
|
56
81
|
details("Aus bus Linn. var. bus").should == {:species=>"bus", :species_authors=>{:authors=>{:names=>["Linn."]}}, :genus=>"Aus", :subspecies=>[{:rank=>"var.", :value=>"bus"}]}
|
|
@@ -61,18 +86,24 @@ describe ScientificName do
|
|
|
61
86
|
it 'should parse several authors' do
|
|
62
87
|
sn = "Pseudocercospora dendrobii U. Braun & Crous"
|
|
63
88
|
parse(sn).should_not be_nil
|
|
64
|
-
value(sn).should == "Pseudocercospora dendrobii U. Braun
|
|
89
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
|
|
65
90
|
canonical(sn).should == "Pseudocercospora dendrobii"
|
|
66
91
|
details(sn).should == {
|
|
67
92
|
:authors=>{:names=>["U. Braun","Crous"]},
|
|
68
93
|
:species=>"dendrobii",
|
|
69
94
|
:genus=>"Pseudocercospora"}
|
|
95
|
+
sn = "Pseudocercospora dendrobii U. Braun and Crous"
|
|
96
|
+
parse(sn).should_not be_nil
|
|
97
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
|
|
98
|
+
sn = "Pseudocercospora dendrobii U. Braun et Crous"
|
|
99
|
+
parse(sn).should_not be_nil
|
|
100
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
|
|
70
101
|
end
|
|
71
102
|
|
|
72
103
|
it 'should parse several authors with a year' do
|
|
73
104
|
sn = "Pseudocercospora dendrobii U. Braun & Crous 2003"
|
|
74
105
|
parse(sn).should_not be_nil
|
|
75
|
-
value(sn).should == "Pseudocercospora dendrobii U. Braun
|
|
106
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous 2003"
|
|
76
107
|
canonical(sn).should == "Pseudocercospora dendrobii"
|
|
77
108
|
details(sn).should == {
|
|
78
109
|
:authors=>{:names=>["U. Braun","Crous"], :year => "2003"},
|
|
@@ -80,21 +111,29 @@ describe ScientificName do
|
|
|
80
111
|
:genus=>"Pseudocercospora"}
|
|
81
112
|
sn = "Pseudocercospora dendrobii Crous, 2003"
|
|
82
113
|
parse(sn).should_not be_nil
|
|
114
|
+
parse("Zophosis persis (Chatanay, 1914)").should_not be_nil
|
|
115
|
+
parse("Zophosis persis (Chatanay 1914)").should_not be_nil
|
|
116
|
+
parse("Zophosis persis (Chatanay), 1914").should_not be_nil
|
|
117
|
+
value("Zophosis persis (Chatanay), 1914").should == "Zophosis persis (Chatanay 1914)"
|
|
118
|
+
details("Zophosis persis (Chatanay), 1914").should == {:genus=>"Zophosis", :species=>"persis", :orig_authors=>{:names=>["Chatanay"]}, :year=>"1914"}
|
|
119
|
+
|
|
120
|
+
parse("Zophosis persis (Chatanay) 1914").should_not be_nil
|
|
121
|
+
#parse("Zophosis persis Chatanay (1914)").should_not be_nil
|
|
83
122
|
end
|
|
84
123
|
|
|
85
124
|
it 'should parse scientific name' do
|
|
86
125
|
parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003").should_not be_nil
|
|
87
|
-
value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun
|
|
126
|
+
value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun et Crous 2003"
|
|
88
127
|
canonical("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
|
|
89
128
|
{:orig_authors=>{:names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
|
|
90
129
|
|
|
91
130
|
parse("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should_not be_nil
|
|
92
|
-
value("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == "Stagonospora polyspora M.T. Lucas
|
|
131
|
+
value("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == "Stagonospora polyspora M.T. Lucas et Sousa da Câmara 1934"
|
|
93
132
|
details("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == {:authors=>{:year=>"1934", :names=>["M.T. Lucas", "Sousa da C\303\242mara"]}, :species=>"polyspora", :genus=>"Stagonospora"}
|
|
94
133
|
|
|
95
134
|
parse("Cladoniicola staurospora Diederich, van den Boom & Aptroot 2001").should_not be_nil
|
|
96
135
|
parse("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should_not be_nil
|
|
97
|
-
value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman
|
|
136
|
+
value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman et E.A. Herrm.) Van der Walt et Arx 1981"
|
|
98
137
|
parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
|
|
99
138
|
parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
|
|
100
139
|
parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
|
|
@@ -114,11 +153,12 @@ describe ScientificName do
|
|
|
114
153
|
parse("Peltula coriacea Büdel, Henssen & Wessels 1986").should_not be_nil
|
|
115
154
|
#had to add no dot rule for trinomials without a rank to make it to work
|
|
116
155
|
parse("Saccharomyces drosophilae anon.").should_not be_nil
|
|
156
|
+
details("Saccharomyces drosophilae anon.").should == {:genus=>"Saccharomyces", :species=>"drosophilae", :authors=>{:names=>["anon."]}}
|
|
117
157
|
end
|
|
118
158
|
|
|
119
159
|
it 'should parse several authors with several years' do
|
|
120
160
|
parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003").should_not be_nil
|
|
121
|
-
value("Pseudocercospora dendrobii(H.C. Burnett1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun
|
|
161
|
+
value("Pseudocercospora dendrobii(H.C. Burnett1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun et Crous 2003"
|
|
122
162
|
canonical("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
|
|
123
163
|
details("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == {:orig_authors=>{:year=>"1883", :names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
|
|
124
164
|
end
|
|
@@ -150,7 +190,7 @@ describe ScientificName do
|
|
|
150
190
|
|
|
151
191
|
it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
|
|
152
192
|
parse("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should_not be_nil
|
|
153
|
-
value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall
|
|
193
|
+
value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall et D.E. Stuntz 1972"
|
|
154
194
|
details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:rank=>"var.", :value=>"zonatum"}, {:rank=>"f.", :value=>"parvum"}], :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum", :is_valid=>false}
|
|
155
195
|
end
|
|
156
196
|
|
|
@@ -185,6 +225,7 @@ describe ScientificName do
|
|
|
185
225
|
#invalid but happens
|
|
186
226
|
parse("Mycosphaerella eryngii (Fr. Duby) ex Oudem. 1897").should_not be_nil
|
|
187
227
|
parse("Mycosphaerella eryngii (Fr.ex Duby) ex Oudem. 1897").should_not be_nil
|
|
228
|
+
parse("Salmonella werahensis (Castellani) Hauduroy and Ehringer in Hauduroy 1937").should_not be_nil
|
|
188
229
|
end
|
|
189
230
|
|
|
190
231
|
it "should parse multiplication sign" do
|
|
@@ -222,9 +263,12 @@ describe ScientificName do
|
|
|
222
263
|
it "should parse name with subspecies without rank NOT BOTANICAL" do
|
|
223
264
|
name = "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
|
|
224
265
|
parse(name).should_not be_nil
|
|
225
|
-
value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall
|
|
266
|
+
value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972"
|
|
226
267
|
canonical(name).should == "Hydnellum scrobiculatum zonatum"
|
|
227
268
|
details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:rank=>"n/a", :value=>"zonatum"}, :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum"}
|
|
269
|
+
sp = "Begonia pingbienensis angustior"
|
|
270
|
+
parse(sp).should_not be_nil
|
|
271
|
+
details(sp).should == {:genus=>"Begonia", :species=>"pingbienensis", :subspecies=>{:rank=>"n/a", :value=>"angustior"}}
|
|
228
272
|
end
|
|
229
273
|
|
|
230
274
|
it "should not parse utf-8 chars in name part" do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dimus-biodiversity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dmitry Mozzherin
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date:
|
|
12
|
+
date: 2009-04-11 00:00:00 -07:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|