dimus-biodiversity 0.0.10 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
data/bin/nnparse CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'rubygems'
3
- gem 'dimus-biodiversity' rescue gem 'biodiversity'
3
+ gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil
4
4
 
5
5
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
6
6
  require 'biodiversity'
@@ -37,11 +37,15 @@ IO.foreach(ARGV[0]) do |n|
37
37
  puts n
38
38
  count += 1
39
39
  else
40
- name_dict[:output] = parsed.value
41
- name_dict[:canononical] = parsed.canonical
42
- name_dict[:details] = parsed.details
43
- name_dict[:parsed => true]
44
- last_result = JSON.generate name_dict
40
+ begin
41
+ name_dict[:output] = parsed.value
42
+ name_dict[:canononical] = parsed.canonical
43
+ name_dict[:details] = parsed.details
44
+ name_dict[:parsed => true]
45
+ last_result = JSON.generate name_dict
46
+ rescue
47
+ puts 'PROBLEM: ' + n
48
+ end
45
49
  end
46
50
  end
47
51
  o.write(last_result + "\n") if last_result
@@ -741,6 +741,10 @@ module ScientificName
741
741
  elements[1]
742
742
  end
743
743
 
744
+ def ex_sep
745
+ elements[2]
746
+ end
747
+
744
748
  def space
745
749
  elements[3]
746
750
  end
@@ -821,13 +825,7 @@ module ScientificName
821
825
  r7 = _nt_space
822
826
  s5 << r7
823
827
  if r7
824
- if input.index("ex", index) == index
825
- r8 = instantiate_node(SyntaxNode,input, index...(index + 2))
826
- @index += 2
827
- else
828
- terminal_parse_failure("ex")
829
- r8 = nil
830
- end
828
+ r8 = _nt_ex_sep
831
829
  s5 << r8
832
830
  if r8
833
831
  r9 = _nt_space
@@ -984,9 +982,44 @@ module ScientificName
984
982
  elements[3]
985
983
  end
986
984
 
985
+ def space
986
+ elements[5]
987
+ end
988
+
989
+ def space
990
+ elements[7]
991
+ end
992
+
993
+ def b
994
+ elements[8]
995
+ end
987
996
  end
988
997
 
989
998
  module OriginalAuthorsNamesFull1
999
+ def value
1000
+ "(" + a.value + " " + b.value + ")"
1001
+ end
1002
+ def details
1003
+ {:orig_authors => a.details[:authors], :year => b.details[:year]}
1004
+ end
1005
+ end
1006
+
1007
+ module OriginalAuthorsNamesFull2
1008
+ def space
1009
+ elements[1]
1010
+ end
1011
+
1012
+ def a
1013
+ elements[2]
1014
+ end
1015
+
1016
+ def space
1017
+ elements[3]
1018
+ end
1019
+
1020
+ end
1021
+
1022
+ module OriginalAuthorsNamesFull3
990
1023
  def value
991
1024
  "(" + a.value + ")"
992
1025
  end
@@ -1003,44 +1036,120 @@ module ScientificName
1003
1036
  return cached
1004
1037
  end
1005
1038
 
1006
- i0, s0 = index, []
1039
+ i0 = index
1040
+ i1, s1 = index, []
1007
1041
  if input.index("(", index) == index
1008
- r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
1042
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
1009
1043
  @index += 1
1010
1044
  else
1011
1045
  terminal_parse_failure("(")
1012
- r1 = nil
1046
+ r2 = nil
1013
1047
  end
1014
- s0 << r1
1015
- if r1
1016
- r2 = _nt_space
1017
- s0 << r2
1018
- if r2
1019
- r3 = _nt_authors_names_full
1020
- s0 << r3
1021
- if r3
1022
- r4 = _nt_space
1023
- s0 << r4
1024
- if r4
1048
+ s1 << r2
1049
+ if r2
1050
+ r3 = _nt_space
1051
+ s1 << r3
1052
+ if r3
1053
+ r4 = _nt_authors_names
1054
+ s1 << r4
1055
+ if r4
1056
+ r5 = _nt_space
1057
+ s1 << r5
1058
+ if r5
1025
1059
  if input.index(")", index) == index
1026
- r5 = instantiate_node(SyntaxNode,input, index...(index + 1))
1060
+ r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
1027
1061
  @index += 1
1028
1062
  else
1029
1063
  terminal_parse_failure(")")
1030
- r5 = nil
1064
+ r6 = nil
1065
+ end
1066
+ s1 << r6
1067
+ if r6
1068
+ r7 = _nt_space
1069
+ s1 << r7
1070
+ if r7
1071
+ if input.index(Regexp.new('[,]'), index) == index
1072
+ r9 = instantiate_node(SyntaxNode,input, index...(index + 1))
1073
+ @index += 1
1074
+ else
1075
+ r9 = nil
1076
+ end
1077
+ if r9
1078
+ r8 = r9
1079
+ else
1080
+ r8 = instantiate_node(SyntaxNode,input, index...index)
1081
+ end
1082
+ s1 << r8
1083
+ if r8
1084
+ r10 = _nt_space
1085
+ s1 << r10
1086
+ if r10
1087
+ r11 = _nt_year
1088
+ s1 << r11
1089
+ end
1090
+ end
1091
+ end
1031
1092
  end
1032
- s0 << r5
1033
1093
  end
1034
1094
  end
1035
1095
  end
1036
1096
  end
1037
- if s0.last
1038
- r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
1039
- r0.extend(OriginalAuthorsNamesFull0)
1040
- r0.extend(OriginalAuthorsNamesFull1)
1097
+ if s1.last
1098
+ r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
1099
+ r1.extend(OriginalAuthorsNamesFull0)
1100
+ r1.extend(OriginalAuthorsNamesFull1)
1041
1101
  else
1042
- self.index = i0
1043
- r0 = nil
1102
+ self.index = i1
1103
+ r1 = nil
1104
+ end
1105
+ if r1
1106
+ r0 = r1
1107
+ else
1108
+ i12, s12 = index, []
1109
+ if input.index("(", index) == index
1110
+ r13 = instantiate_node(SyntaxNode,input, index...(index + 1))
1111
+ @index += 1
1112
+ else
1113
+ terminal_parse_failure("(")
1114
+ r13 = nil
1115
+ end
1116
+ s12 << r13
1117
+ if r13
1118
+ r14 = _nt_space
1119
+ s12 << r14
1120
+ if r14
1121
+ r15 = _nt_authors_names_full
1122
+ s12 << r15
1123
+ if r15
1124
+ r16 = _nt_space
1125
+ s12 << r16
1126
+ if r16
1127
+ if input.index(")", index) == index
1128
+ r17 = instantiate_node(SyntaxNode,input, index...(index + 1))
1129
+ @index += 1
1130
+ else
1131
+ terminal_parse_failure(")")
1132
+ r17 = nil
1133
+ end
1134
+ s12 << r17
1135
+ end
1136
+ end
1137
+ end
1138
+ end
1139
+ if s12.last
1140
+ r12 = instantiate_node(SyntaxNode,input, i12...index, s12)
1141
+ r12.extend(OriginalAuthorsNamesFull2)
1142
+ r12.extend(OriginalAuthorsNamesFull3)
1143
+ else
1144
+ self.index = i12
1145
+ r12 = nil
1146
+ end
1147
+ if r12
1148
+ r0 = r12
1149
+ else
1150
+ self.index = i0
1151
+ r0 = nil
1152
+ end
1044
1153
  end
1045
1154
 
1046
1155
  node_cache[:original_authors_names_full][start_index] = r0
@@ -1135,6 +1244,10 @@ module ScientificName
1135
1244
  elements[1]
1136
1245
  end
1137
1246
 
1247
+ def ex_sep
1248
+ elements[2]
1249
+ end
1250
+
1138
1251
  def space
1139
1252
  elements[3]
1140
1253
  end
@@ -1168,13 +1281,7 @@ module ScientificName
1168
1281
  r2 = _nt_space
1169
1282
  s0 << r2
1170
1283
  if r2
1171
- if input.index("ex", index) == index
1172
- r3 = instantiate_node(SyntaxNode,input, index...(index + 2))
1173
- @index += 2
1174
- else
1175
- terminal_parse_failure("ex")
1176
- r3 = nil
1177
- end
1284
+ r3 = _nt_ex_sep
1178
1285
  s0 << r3
1179
1286
  if r3
1180
1287
  r4 = _nt_space
@@ -1290,6 +1397,45 @@ module ScientificName
1290
1397
  return r0
1291
1398
  end
1292
1399
 
1400
+ def _nt_ex_sep
1401
+ start_index = index
1402
+ if node_cache[:ex_sep].has_key?(index)
1403
+ cached = node_cache[:ex_sep][index]
1404
+ @index = cached.interval.end if cached
1405
+ return cached
1406
+ end
1407
+
1408
+ i0 = index
1409
+ if input.index("ex", index) == index
1410
+ r1 = instantiate_node(SyntaxNode,input, index...(index + 2))
1411
+ @index += 2
1412
+ else
1413
+ terminal_parse_failure("ex")
1414
+ r1 = nil
1415
+ end
1416
+ if r1
1417
+ r0 = r1
1418
+ else
1419
+ if input.index("in", index) == index
1420
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 2))
1421
+ @index += 2
1422
+ else
1423
+ terminal_parse_failure("in")
1424
+ r2 = nil
1425
+ end
1426
+ if r2
1427
+ r0 = r2
1428
+ else
1429
+ self.index = i0
1430
+ r0 = nil
1431
+ end
1432
+ end
1433
+
1434
+ node_cache[:ex_sep][start_index] = r0
1435
+
1436
+ return r0
1437
+ end
1438
+
1293
1439
  module AuthorsNames0
1294
1440
  def a
1295
1441
  elements[0]
@@ -1378,7 +1524,7 @@ module ScientificName
1378
1524
  module AuthorNameSeparator0
1379
1525
  def apply(a,b)
1380
1526
  sep = text_value.strip
1381
- sep = " " + sep if sep == "&"
1527
+ sep = " et" if ["&","and","et"].include? sep
1382
1528
  a.value + sep + " " + b.value
1383
1529
  end
1384
1530
 
@@ -1418,8 +1564,32 @@ module ScientificName
1418
1564
  r0 = r2
1419
1565
  r0.extend(AuthorNameSeparator0)
1420
1566
  else
1421
- self.index = i0
1422
- r0 = nil
1567
+ if input.index("and", index) == index
1568
+ r3 = instantiate_node(SyntaxNode,input, index...(index + 3))
1569
+ @index += 3
1570
+ else
1571
+ terminal_parse_failure("and")
1572
+ r3 = nil
1573
+ end
1574
+ if r3
1575
+ r0 = r3
1576
+ r0.extend(AuthorNameSeparator0)
1577
+ else
1578
+ if input.index("et", index) == index
1579
+ r4 = instantiate_node(SyntaxNode,input, index...(index + 2))
1580
+ @index += 2
1581
+ else
1582
+ terminal_parse_failure("et")
1583
+ r4 = nil
1584
+ end
1585
+ if r4
1586
+ r0 = r4
1587
+ r0.extend(AuthorNameSeparator0)
1588
+ else
1589
+ self.index = i0
1590
+ r0 = nil
1591
+ end
1592
+ end
1423
1593
  end
1424
1594
  end
1425
1595
 
@@ -2104,17 +2274,17 @@ module ScientificName
2104
2274
  s13 << r17
2105
2275
  if r17
2106
2276
  i18 = index
2107
- if input.index(Regexp.new('[^\\.]'), index) == index
2277
+ if input.index(Regexp.new('[\\.]'), index) == index
2108
2278
  r19 = instantiate_node(SyntaxNode,input, index...(index + 1))
2109
2279
  @index += 1
2110
2280
  else
2111
2281
  r19 = nil
2112
2282
  end
2113
2283
  if r19
2284
+ r18 = nil
2285
+ else
2114
2286
  self.index = i18
2115
2287
  r18 = instantiate_node(SyntaxNode,input, index...index)
2116
- else
2117
- r18 = nil
2118
2288
  end
2119
2289
  s13 << r18
2120
2290
  end
@@ -2392,6 +2562,9 @@ module ScientificName
2392
2562
  def value
2393
2563
  a.value + b.value
2394
2564
  end
2565
+ def details
2566
+ {:editorial_markup => value, :is_valid => false}
2567
+ end
2395
2568
  end
2396
2569
 
2397
2570
  def _nt_editorials
@@ -3277,25 +3450,40 @@ module ScientificName
3277
3450
  return r0
3278
3451
  end
3279
3452
 
3280
- module LatinWord0
3453
+ module CapLatinWord0
3454
+ def a
3455
+ elements[0]
3456
+ end
3457
+
3458
+ def b
3459
+ elements[1]
3460
+ end
3281
3461
  end
3282
3462
 
3283
- module LatinWord1
3284
- def value
3285
- text_value.strip
3463
+ module CapLatinWord1
3464
+ def value
3465
+ a.text_value + b.value
3466
+ end
3467
+
3468
+ def canonical
3469
+ value
3470
+ end
3471
+
3472
+ def details
3473
+ {:uninomial => value}
3286
3474
  end
3287
3475
  end
3288
3476
 
3289
- def _nt_latin_word
3477
+ def _nt_cap_latin_word
3290
3478
  start_index = index
3291
- if node_cache[:latin_word].has_key?(index)
3292
- cached = node_cache[:latin_word][index]
3479
+ if node_cache[:cap_latin_word].has_key?(index)
3480
+ cached = node_cache[:cap_latin_word][index]
3293
3481
  @index = cached.interval.end if cached
3294
3482
  return cached
3295
3483
  end
3296
3484
 
3297
3485
  i0, s0 = index, []
3298
- if input.index(Regexp.new('[a-]'), index) == index
3486
+ if input.index(Regexp.new('[A-Z]'), index) == index
3299
3487
  r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
3300
3488
  @index += 1
3301
3489
  else
@@ -3303,117 +3491,303 @@ module ScientificName
3303
3491
  end
3304
3492
  s0 << r1
3305
3493
  if r1
3306
- s2, i2 = [], index
3307
- loop do
3308
- if input.index(Regexp.new('[a-z\\-ëüäöï]'), index) == index
3309
- r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
3310
- @index += 1
3311
- else
3312
- r3 = nil
3313
- end
3314
- if r3
3315
- s2 << r3
3316
- else
3317
- break
3318
- end
3319
- end
3320
- if s2.empty?
3321
- self.index = i2
3322
- r2 = nil
3323
- else
3324
- r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
3325
- end
3494
+ r2 = _nt_latin_word
3326
3495
  s0 << r2
3327
3496
  end
3328
3497
  if s0.last
3329
3498
  r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
3330
- r0.extend(LatinWord0)
3331
- r0.extend(LatinWord1)
3499
+ r0.extend(CapLatinWord0)
3500
+ r0.extend(CapLatinWord1)
3332
3501
  else
3333
3502
  self.index = i0
3334
3503
  r0 = nil
3335
3504
  end
3336
3505
 
3337
- node_cache[:latin_word][start_index] = r0
3506
+ node_cache[:cap_latin_word][start_index] = r0
3338
3507
 
3339
3508
  return r0
3340
3509
  end
3341
3510
 
3342
- module CapLatinWord0
3511
+ module LatinWord0
3512
+ def a
3513
+ elements[0]
3514
+ end
3515
+
3516
+ def b
3517
+ elements[1]
3518
+ end
3343
3519
  end
3344
3520
 
3345
- module CapLatinWord1
3521
+ module LatinWord1
3346
3522
  def value
3347
- text_value.strip
3348
- end
3349
-
3350
- def canonical
3351
- text_value.strip
3352
- end
3353
-
3354
- def details
3355
- {:uninomial => value}
3523
+ a.text_value + b.value
3356
3524
  end
3357
3525
  end
3358
3526
 
3359
- def _nt_cap_latin_word
3360
- start_index = index
3361
- if node_cache[:cap_latin_word].has_key?(index)
3362
- cached = node_cache[:cap_latin_word][index]
3363
- @index = cached.interval.end if cached
3364
- return cached
3527
+ module LatinWord2
3528
+ def a
3529
+ elements[0]
3365
3530
  end
3366
3531
 
3367
- i0, s0 = index, []
3368
- if input.index(Regexp.new('[A-Z]'), index) == index
3369
- r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
3532
+ def b
3533
+ elements[1]
3534
+ end
3535
+ end
3536
+
3537
+ module LatinWord3
3538
+ def value
3539
+ a.value + b.value
3540
+ end
3541
+ end
3542
+
3543
+ def _nt_latin_word
3544
+ start_index = index
3545
+ if node_cache[:latin_word].has_key?(index)
3546
+ cached = node_cache[:latin_word][index]
3547
+ @index = cached.interval.end if cached
3548
+ return cached
3549
+ end
3550
+
3551
+ i0 = index
3552
+ i1, s1 = index, []
3553
+ if input.index(Regexp.new('[a-zë]'), index) == index
3554
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
3370
3555
  @index += 1
3371
3556
  else
3557
+ r2 = nil
3558
+ end
3559
+ s1 << r2
3560
+ if r2
3561
+ r3 = _nt_full_name_letters
3562
+ s1 << r3
3563
+ end
3564
+ if s1.last
3565
+ r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
3566
+ r1.extend(LatinWord0)
3567
+ r1.extend(LatinWord1)
3568
+ else
3569
+ self.index = i1
3372
3570
  r1 = nil
3373
3571
  end
3374
- s0 << r1
3375
3572
  if r1
3376
- if input.index(Regexp.new('[a-zë]'), index) == index
3377
- r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
3378
- @index += 1
3573
+ r0 = r1
3574
+ else
3575
+ i4, s4 = index, []
3576
+ r5 = _nt_digraph
3577
+ s4 << r5
3578
+ if r5
3579
+ r6 = _nt_full_name_letters
3580
+ s4 << r6
3581
+ end
3582
+ if s4.last
3583
+ r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
3584
+ r4.extend(LatinWord2)
3585
+ r4.extend(LatinWord3)
3379
3586
  else
3380
- r2 = nil
3587
+ self.index = i4
3588
+ r4 = nil
3381
3589
  end
3382
- s0 << r2
3383
- if r2
3384
- s3, i3 = [], index
3385
- loop do
3386
- if input.index(Regexp.new('[a-z\\-ëüäöï]'), index) == index
3387
- r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
3388
- @index += 1
3389
- else
3390
- r4 = nil
3391
- end
3392
- if r4
3393
- s3 << r4
3394
- else
3395
- break
3396
- end
3590
+ if r4
3591
+ r0 = r4
3592
+ else
3593
+ self.index = i0
3594
+ r0 = nil
3595
+ end
3596
+ end
3597
+
3598
+ node_cache[:latin_word][start_index] = r0
3599
+
3600
+ return r0
3601
+ end
3602
+
3603
+ module FullNameLetters0
3604
+ def a
3605
+ elements[0]
3606
+ end
3607
+
3608
+ def b
3609
+ elements[1]
3610
+ end
3611
+ end
3612
+
3613
+ module FullNameLetters1
3614
+ def value
3615
+ a.value + b.value
3616
+ end
3617
+ end
3618
+
3619
+ module FullNameLetters2
3620
+ def a
3621
+ elements[0]
3622
+ end
3623
+
3624
+ def b
3625
+ elements[1]
3626
+ end
3627
+
3628
+ def c
3629
+ elements[2]
3630
+ end
3631
+ end
3632
+
3633
+ module FullNameLetters3
3634
+ def value
3635
+ a.value + b.value + c.value
3636
+ end
3637
+ end
3638
+
3639
+ def _nt_full_name_letters
3640
+ start_index = index
3641
+ if node_cache[:full_name_letters].has_key?(index)
3642
+ cached = node_cache[:full_name_letters][index]
3643
+ @index = cached.interval.end if cached
3644
+ return cached
3645
+ end
3646
+
3647
+ i0 = index
3648
+ i1, s1 = index, []
3649
+ r2 = _nt_digraph
3650
+ s1 << r2
3651
+ if r2
3652
+ r3 = _nt_full_name_letters
3653
+ s1 << r3
3654
+ end
3655
+ if s1.last
3656
+ r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
3657
+ r1.extend(FullNameLetters0)
3658
+ r1.extend(FullNameLetters1)
3659
+ else
3660
+ self.index = i1
3661
+ r1 = nil
3662
+ end
3663
+ if r1
3664
+ r0 = r1
3665
+ else
3666
+ i4, s4 = index, []
3667
+ r5 = _nt_valid_name_letters
3668
+ s4 << r5
3669
+ if r5
3670
+ r6 = _nt_digraph
3671
+ s4 << r6
3672
+ if r6
3673
+ r7 = _nt_full_name_letters
3674
+ s4 << r7
3397
3675
  end
3398
- if s3.empty?
3399
- self.index = i3
3400
- r3 = nil
3676
+ end
3677
+ if s4.last
3678
+ r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
3679
+ r4.extend(FullNameLetters2)
3680
+ r4.extend(FullNameLetters3)
3681
+ else
3682
+ self.index = i4
3683
+ r4 = nil
3684
+ end
3685
+ if r4
3686
+ r0 = r4
3687
+ else
3688
+ r8 = _nt_valid_name_letters
3689
+ if r8
3690
+ r0 = r8
3401
3691
  else
3402
- r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
3692
+ self.index = i0
3693
+ r0 = nil
3403
3694
  end
3404
- s0 << r3
3405
3695
  end
3406
3696
  end
3407
- if s0.last
3408
- r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
3409
- r0.extend(CapLatinWord0)
3410
- r0.extend(CapLatinWord1)
3411
- else
3697
+
3698
+ node_cache[:full_name_letters][start_index] = r0
3699
+
3700
+ return r0
3701
+ end
3702
+
3703
+ module ValidNameLetters0
3704
+ def value
3705
+ text_value
3706
+ end
3707
+ end
3708
+
3709
+ def _nt_valid_name_letters
3710
+ start_index = index
3711
+ if node_cache[:valid_name_letters].has_key?(index)
3712
+ cached = node_cache[:valid_name_letters][index]
3713
+ @index = cached.interval.end if cached
3714
+ return cached
3715
+ end
3716
+
3717
+ s0, i0 = [], index
3718
+ loop do
3719
+ if input.index(Regexp.new('[a-z\\-ëüäöï]'), index) == index
3720
+ r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
3721
+ @index += 1
3722
+ else
3723
+ r1 = nil
3724
+ end
3725
+ if r1
3726
+ s0 << r1
3727
+ else
3728
+ break
3729
+ end
3730
+ end
3731
+ if s0.empty?
3412
3732
  self.index = i0
3413
3733
  r0 = nil
3734
+ else
3735
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
3736
+ r0.extend(ValidNameLetters0)
3414
3737
  end
3415
3738
 
3416
- node_cache[:cap_latin_word][start_index] = r0
3739
+ node_cache[:valid_name_letters][start_index] = r0
3740
+
3741
+ return r0
3742
+ end
3743
+
3744
+ module Digraph0
3745
+ def value
3746
+ 'ae'
3747
+ end
3748
+ end
3749
+
3750
+ module Digraph1
3751
+ def value
3752
+ 'oe'
3753
+ end
3754
+ end
3755
+
3756
+ def _nt_digraph
3757
+ start_index = index
3758
+ if node_cache[:digraph].has_key?(index)
3759
+ cached = node_cache[:digraph][index]
3760
+ @index = cached.interval.end if cached
3761
+ return cached
3762
+ end
3763
+
3764
+ i0 = index
3765
+ if input.index(Regexp.new('[æ]'), index) == index
3766
+ r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
3767
+ r1.extend(Digraph0)
3768
+ @index += 1
3769
+ else
3770
+ r1 = nil
3771
+ end
3772
+ if r1
3773
+ r0 = r1
3774
+ else
3775
+ if input.index(Regexp.new('[œ]'), index) == index
3776
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
3777
+ r2.extend(Digraph1)
3778
+ @index += 1
3779
+ else
3780
+ r2 = nil
3781
+ end
3782
+ if r2
3783
+ r0 = r2
3784
+ else
3785
+ self.index = i0
3786
+ r0 = nil
3787
+ end
3788
+ end
3789
+
3790
+ node_cache[:digraph][start_index] = r0
3417
3791
 
3418
3792
  return r0
3419
3793
  end
@@ -3494,29 +3868,110 @@ module ScientificName
3494
3868
  return cached
3495
3869
  end
3496
3870
 
3497
- s0, i0 = [], index
3871
+ i0 = index
3872
+ r1 = _nt_year_with_character
3873
+ if r1
3874
+ r0 = r1
3875
+ else
3876
+ s2, i2 = [], index
3877
+ loop do
3878
+ if input.index(Regexp.new('[0-9\\?]'), index) == index
3879
+ r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
3880
+ @index += 1
3881
+ else
3882
+ r3 = nil
3883
+ end
3884
+ if r3
3885
+ s2 << r3
3886
+ else
3887
+ break
3888
+ end
3889
+ end
3890
+ if s2.empty?
3891
+ self.index = i2
3892
+ r2 = nil
3893
+ else
3894
+ r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
3895
+ r2.extend(Year0)
3896
+ end
3897
+ if r2
3898
+ r0 = r2
3899
+ else
3900
+ self.index = i0
3901
+ r0 = nil
3902
+ end
3903
+ end
3904
+
3905
+ node_cache[:year][start_index] = r0
3906
+
3907
+ return r0
3908
+ end
3909
+
3910
+ module YearWithCharacter0
3911
+ def a
3912
+ elements[0]
3913
+ end
3914
+
3915
+ end
3916
+
3917
+ module YearWithCharacter1
3918
+ def value
3919
+ a.text_value
3920
+ end
3921
+ def details
3922
+ {:year => value}
3923
+ end
3924
+ end
3925
+
3926
+ def _nt_year_with_character
3927
+ start_index = index
3928
+ if node_cache[:year_with_character].has_key?(index)
3929
+ cached = node_cache[:year_with_character][index]
3930
+ @index = cached.interval.end if cached
3931
+ return cached
3932
+ end
3933
+
3934
+ i0, s0 = index, []
3935
+ s1, i1 = [], index
3498
3936
  loop do
3499
3937
  if input.index(Regexp.new('[0-9\\?]'), index) == index
3500
- r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
3938
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
3501
3939
  @index += 1
3502
3940
  else
3503
- r1 = nil
3941
+ r2 = nil
3504
3942
  end
3505
- if r1
3506
- s0 << r1
3943
+ if r2
3944
+ s1 << r2
3507
3945
  else
3508
3946
  break
3509
3947
  end
3510
3948
  end
3511
- if s0.empty?
3512
- self.index = i0
3513
- r0 = nil
3949
+ if s1.empty?
3950
+ self.index = i1
3951
+ r1 = nil
3514
3952
  else
3953
+ r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
3954
+ end
3955
+ s0 << r1
3956
+ if r1
3957
+ if input.index(Regexp.new('[a-zA-Z]'), index) == index
3958
+ r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
3959
+ @index += 1
3960
+ else
3961
+ r3 = nil
3962
+ end
3963
+ s0 << r3
3964
+ end
3965
+ if s0.last
3515
3966
  r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
3516
- r0.extend(Year0)
3967
+ r0.extend(YearWithCharacter0)
3968
+ r0.extend(YearWithCharacter1)
3969
+ else
3970
+ self.index = i0
3971
+ r0 = nil
3517
3972
  end
3518
3973
 
3519
- node_cache[:year][start_index] = r0
3974
+ node_cache[:year_with_character][start_index] = r0
3520
3975
 
3521
3976
  return r0
3522
3977
  end
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  grammar ScientificName
2
3
 
3
4
  rule composite_scientific_name
@@ -27,7 +28,7 @@ grammar ScientificName
27
28
  end
28
29
  }
29
30
  /
30
- scientific_name
31
+ scientific_name
31
32
  end
32
33
 
33
34
  rule scientific_name
@@ -137,7 +138,7 @@ grammar ScientificName
137
138
  end
138
139
  }
139
140
  /
140
- a:simple_authors_part space "ex" space b:simple_authors_part {
141
+ a:simple_authors_part space ex_sep space b:simple_authors_part {
141
142
  def value
142
143
  a.value + " ex " + b.value
143
144
  end
@@ -179,6 +180,15 @@ grammar ScientificName
179
180
  end
180
181
 
181
182
  rule original_authors_names_full
183
+ "(" space a:authors_names space ")" space [,]? space b:year {
184
+ def value
185
+ "(" + a.value + " " + b.value + ")"
186
+ end
187
+ def details
188
+ {:orig_authors => a.details[:authors], :year => b.details[:year]}
189
+ end
190
+ }
191
+ /
182
192
  "(" space a:authors_names_full space ")" {
183
193
  def value
184
194
  "(" + a.value + ")"
@@ -202,7 +212,7 @@ grammar ScientificName
202
212
  end
203
213
 
204
214
  rule authors_revised_name
205
- a:authors_names_full space "ex" space b:authors_names_full {
215
+ a:authors_names_full space ex_sep space b:authors_names_full {
206
216
  def value
207
217
  a.value + " ex " + b.value
208
218
  end
@@ -224,6 +234,10 @@ grammar ScientificName
224
234
  /
225
235
  authors_names
226
236
  end
237
+
238
+ rule ex_sep
239
+ ("ex"/"in")
240
+ end
227
241
 
228
242
  rule authors_names
229
243
  a:author_name space sep:author_name_separator space b:authors_names {
@@ -240,10 +254,10 @@ grammar ScientificName
240
254
  end
241
255
 
242
256
  rule author_name_separator
243
- ("&"/",") {
257
+ ("&"/","/"and"/"et") {
244
258
  def apply(a,b)
245
259
  sep = text_value.strip
246
- sep = " " + sep if sep == "&"
260
+ sep = " et" if ["&","and","et"].include? sep
247
261
  a.value + sep + " " + b.value
248
262
  end
249
263
 
@@ -315,7 +329,7 @@ grammar ScientificName
315
329
  end
316
330
  }
317
331
  /
318
- space a:species_name space b:latin_word &[^\.] {
332
+ space a:species_name space b:latin_word ![\.] {
319
333
  def value
320
334
  a.value + " " + b.value
321
335
  end
@@ -385,6 +399,9 @@ grammar ScientificName
385
399
  space a:rank space [&]? space b:editorials {
386
400
  def value
387
401
  a.value + b.value
402
+ end
403
+ def details
404
+ {:editorial_markup => value, :is_valid => false}
388
405
  end
389
406
  }
390
407
  /
@@ -483,23 +500,15 @@ grammar ScientificName
483
500
  end
484
501
  }
485
502
  end
486
-
487
- rule latin_word
488
- [a-zë] [a-z\-ëüäöï]+ {
489
- def value
490
- text_value.strip
491
- end
492
- }
493
- end
494
503
 
495
504
  rule cap_latin_word
496
- [A-Z] [a-zë] [a-z\-ëüäöï]+ {
505
+ a:[A-Z] b:latin_word {
497
506
  def value
498
- text_value.strip
507
+ a.text_value + b.value
499
508
  end
500
509
 
501
510
  def canonical
502
- text_value.strip
511
+ value
503
512
  end
504
513
 
505
514
  def details
@@ -507,7 +516,59 @@ grammar ScientificName
507
516
  end
508
517
  }
509
518
  end
519
+
520
+ rule latin_word
521
+ a:[a-zë] b:full_name_letters {
522
+ def value
523
+ a.text_value + b.value
524
+ end
525
+ }
526
+ /
527
+ a:digraph b:full_name_letters {
528
+ def value
529
+ a.value + b.value
530
+ end
531
+ }
532
+ end
510
533
 
534
+ rule full_name_letters
535
+ a:digraph b:full_name_letters {
536
+ def value
537
+ a.value + b.value
538
+ end
539
+ }
540
+ /
541
+ a:valid_name_letters b:digraph c:full_name_letters {
542
+ def value
543
+ a.value + b.value + c.value
544
+ end
545
+ }
546
+ /
547
+ valid_name_letters
548
+ end
549
+
550
+ rule valid_name_letters
551
+ [a-z\-ëüäöï]+ {
552
+ def value
553
+ text_value
554
+ end
555
+ }
556
+ end
557
+
558
+ rule digraph
559
+ [æ] {
560
+ def value
561
+ 'ae'
562
+ end
563
+ }
564
+ /
565
+ [œ] {
566
+ def value
567
+ 'oe'
568
+ end
569
+ }
570
+ end
571
+
511
572
  rule hybrid_separator
512
573
  ("x"/"X"/"×") {
513
574
  def value
@@ -517,6 +578,8 @@ grammar ScientificName
517
578
  end
518
579
 
519
580
  rule year
581
+ year_with_character
582
+ /
520
583
  [0-9\?]+ {
521
584
  def value
522
585
  text_value.strip
@@ -526,7 +589,18 @@ grammar ScientificName
526
589
  end
527
590
  }
528
591
  end
529
-
592
+
593
+ rule year_with_character
594
+ a:[0-9\?]+ [a-zA-Z] {
595
+ def value
596
+ a.text_value
597
+ end
598
+ def details
599
+ {:year => value}
600
+ end
601
+ }
602
+ end
603
+
530
604
  rule space
531
605
  [\s]*
532
606
  end
@@ -51,6 +51,31 @@ describe ScientificName do
51
51
  details(sn).should == {:subgenus=>"Amerigo", :authors=>{:year=>"1999", :names=>["Author"]}, :species=>"pealeii", :genus=>"Doriteuthis"}
52
52
  end
53
53
 
54
+ it 'should parse æ in the name' do
55
+ names = [
56
+ ["Læptura laetifica Dow, 1913", "Laeptura laetifica Dow 1913"],
57
+ ["Leptura lætifica Dow, 1913", "Leptura laetifica Dow 1913"],
58
+ ["Leptura leætifica Dow, 1913", "Leptura leaetifica Dow 1913"],
59
+ ["Leæptura laetifica Dow, 1913", "Leaeptura laetifica Dow 1913"],
60
+ ["Leœptura laetifica Dow, 1913", "Leoeptura laetifica Dow 1913"]
61
+ ]
62
+ names.each do |name_pair|
63
+ parse(name_pair[0]).should_not be_nil
64
+ value(name_pair[0]).should == name_pair[1]
65
+ end
66
+ end
67
+
68
+ it 'should parse year' do
69
+ sn = "Platypus bicaudatulus Schedl 1935"
70
+ parse(sn).should_not be_nil
71
+ value(sn).should == "Platypus bicaudatulus Schedl 1935"
72
+ sn = "Platypus bicaudatulus Schedl, 1935h"
73
+ parse(sn).should_not be_nil
74
+ value(sn).should == "Platypus bicaudatulus Schedl 1935"
75
+ details(sn).should == {:genus=>"Platypus", :species=>"bicaudatulus", :authors=>{:names=>["Schedl"], :year=>"1935"}}
76
+ parse("Platypus bicaudatulus Schedl, 1935B").should_not be_nil
77
+ end
78
+
54
79
  it 'should parse species autonym for complex subspecies authorships' do
55
80
  parse("Aus bus Linn. var. bus").should_not be_nil
56
81
  details("Aus bus Linn. var. bus").should == {:species=>"bus", :species_authors=>{:authors=>{:names=>["Linn."]}}, :genus=>"Aus", :subspecies=>[{:rank=>"var.", :value=>"bus"}]}
@@ -61,18 +86,24 @@ describe ScientificName do
61
86
  it 'should parse several authors' do
62
87
  sn = "Pseudocercospora dendrobii U. Braun & Crous"
63
88
  parse(sn).should_not be_nil
64
- value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous"
89
+ value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
65
90
  canonical(sn).should == "Pseudocercospora dendrobii"
66
91
  details(sn).should == {
67
92
  :authors=>{:names=>["U. Braun","Crous"]},
68
93
  :species=>"dendrobii",
69
94
  :genus=>"Pseudocercospora"}
95
+ sn = "Pseudocercospora dendrobii U. Braun and Crous"
96
+ parse(sn).should_not be_nil
97
+ value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
98
+ sn = "Pseudocercospora dendrobii U. Braun et Crous"
99
+ parse(sn).should_not be_nil
100
+ value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous"
70
101
  end
71
102
 
72
103
  it 'should parse several authors with a year' do
73
104
  sn = "Pseudocercospora dendrobii U. Braun & Crous 2003"
74
105
  parse(sn).should_not be_nil
75
- value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous 2003"
106
+ value(sn).should == "Pseudocercospora dendrobii U. Braun et Crous 2003"
76
107
  canonical(sn).should == "Pseudocercospora dendrobii"
77
108
  details(sn).should == {
78
109
  :authors=>{:names=>["U. Braun","Crous"], :year => "2003"},
@@ -80,21 +111,29 @@ describe ScientificName do
80
111
  :genus=>"Pseudocercospora"}
81
112
  sn = "Pseudocercospora dendrobii Crous, 2003"
82
113
  parse(sn).should_not be_nil
114
+ parse("Zophosis persis (Chatanay, 1914)").should_not be_nil
115
+ parse("Zophosis persis (Chatanay 1914)").should_not be_nil
116
+ parse("Zophosis persis (Chatanay), 1914").should_not be_nil
117
+ value("Zophosis persis (Chatanay), 1914").should == "Zophosis persis (Chatanay 1914)"
118
+ details("Zophosis persis (Chatanay), 1914").should == {:genus=>"Zophosis", :species=>"persis", :orig_authors=>{:names=>["Chatanay"]}, :year=>"1914"}
119
+
120
+ parse("Zophosis persis (Chatanay) 1914").should_not be_nil
121
+ #parse("Zophosis persis Chatanay (1914)").should_not be_nil
83
122
  end
84
123
 
85
124
  it 'should parse scientific name' do
86
125
  parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003").should_not be_nil
87
- value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003"
126
+ value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun et Crous 2003"
88
127
  canonical("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
89
128
  {:orig_authors=>{:names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
90
129
 
91
130
  parse("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should_not be_nil
92
- value("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == "Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934"
131
+ value("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == "Stagonospora polyspora M.T. Lucas et Sousa da Câmara 1934"
93
132
  details("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == {:authors=>{:year=>"1934", :names=>["M.T. Lucas", "Sousa da C\303\242mara"]}, :species=>"polyspora", :genus=>"Stagonospora"}
94
133
 
95
134
  parse("Cladoniicola staurospora Diederich, van den Boom & Aptroot 2001").should_not be_nil
96
135
  parse("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should_not be_nil
97
- value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981"
136
+ value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman et E.A. Herrm.) Van der Walt et Arx 1981"
98
137
  parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
99
138
  parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
100
139
  parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
@@ -114,11 +153,12 @@ describe ScientificName do
114
153
  parse("Peltula coriacea Büdel, Henssen & Wessels 1986").should_not be_nil
115
154
  #had to add no dot rule for trinomials without a rank to make it to work
116
155
  parse("Saccharomyces drosophilae anon.").should_not be_nil
156
+ details("Saccharomyces drosophilae anon.").should == {:genus=>"Saccharomyces", :species=>"drosophilae", :authors=>{:names=>["anon."]}}
117
157
  end
118
158
 
119
159
  it 'should parse several authors with several years' do
120
160
  parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003").should_not be_nil
121
- value("Pseudocercospora dendrobii(H.C. Burnett1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003"
161
+ value("Pseudocercospora dendrobii(H.C. Burnett1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun et Crous 2003"
122
162
  canonical("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
123
163
  details("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == {:orig_authors=>{:year=>"1883", :names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
124
164
  end
@@ -150,7 +190,7 @@ describe ScientificName do
150
190
 
151
191
  it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
152
192
  parse("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should_not be_nil
153
- value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
193
+ value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall et D.E. Stuntz 1972"
154
194
  details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:rank=>"var.", :value=>"zonatum"}, {:rank=>"f.", :value=>"parvum"}], :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum", :is_valid=>false}
155
195
  end
156
196
 
@@ -185,6 +225,7 @@ describe ScientificName do
185
225
  #invalid but happens
186
226
  parse("Mycosphaerella eryngii (Fr. Duby) ex Oudem. 1897").should_not be_nil
187
227
  parse("Mycosphaerella eryngii (Fr.ex Duby) ex Oudem. 1897").should_not be_nil
228
+ parse("Salmonella werahensis (Castellani) Hauduroy and Ehringer in Hauduroy 1937").should_not be_nil
188
229
  end
189
230
 
190
231
  it "should parse multiplication sign" do
@@ -222,9 +263,12 @@ describe ScientificName do
222
263
  it "should parse name with subspecies without rank NOT BOTANICAL" do
223
264
  name = "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
224
265
  parse(name).should_not be_nil
225
- value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
266
+ value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972"
226
267
  canonical(name).should == "Hydnellum scrobiculatum zonatum"
227
268
  details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:rank=>"n/a", :value=>"zonatum"}, :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum"}
269
+ sp = "Begonia pingbienensis angustior"
270
+ parse(sp).should_not be_nil
271
+ details(sp).should == {:genus=>"Begonia", :species=>"pingbienensis", :subspecies=>{:rank=>"n/a", :value=>"angustior"}}
228
272
  end
229
273
 
230
274
  it "should not parse utf-8 chars in name part" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dimus-biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-10-21 00:00:00 -07:00
12
+ date: 2009-04-11 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency