biodiversity 0.5.15 → 0.5.16

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,6 +22,10 @@ grammar ScientificNameClean
22
22
  def details
23
23
  a.details.class == Array ? a.details : [a.details]
24
24
  end
25
+
26
+ def parser_run
27
+ 1
28
+ end
25
29
  }
26
30
  end
27
31
 
@@ -509,7 +513,7 @@ grammar ScientificNameClean
509
513
  end
510
514
 
511
515
  rule genus
512
- a:cap_latin_word !(space_hard author_prefix_word space_hard author_word) {
516
+ a:(cap_latin_word_pair/cap_latin_word) !(space_hard author_prefix_word space_hard author_word) {
513
517
  def value
514
518
  a.value
515
519
  end
@@ -555,7 +559,7 @@ grammar ScientificNameClean
555
559
  end
556
560
 
557
561
  rule uninomial_string
558
- cap_latin_word {
562
+ (cap_latin_word_pair/cap_latin_word) {
559
563
  def canonical
560
564
  value
561
565
  end
@@ -938,6 +942,14 @@ grammar ScientificNameClean
938
942
  }
939
943
  end
940
944
 
945
+ rule cap_latin_word_pair
946
+ a:cap_latin_word "-" b:cap_latin_word {
947
+ def value
948
+ a.value + b.value.downcase
949
+ end
950
+ }
951
+ end
952
+
941
953
  rule cap_latin_word
942
954
  a:([A-Z]/cap_digraph) b:latin_word "?" {
943
955
  def value
@@ -951,6 +963,12 @@ grammar ScientificNameClean
951
963
  end
952
964
  }
953
965
  /
966
+ a:("AE"/"OE") b:latin_word {
967
+ def value
968
+ a.text_value[0..0] + 'e' + b.value
969
+ end
970
+ }
971
+ /
954
972
  ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
955
973
  def value
956
974
  text_value
@@ -1041,42 +1059,32 @@ grammar ScientificNameClean
1041
1059
  end
1042
1060
 
1043
1061
  rule latin_word
1044
- a:[a-zëüäöïéåóç] b:full_name_letters {
1045
- def value
1046
- a.text_value + b.value
1047
- end
1048
- }
1049
- /
1050
- a:digraph b:full_name_letters {
1062
+ a:[a-zëæœ] b:valid_name_letters {
1051
1063
  def value
1052
- a.value + b.value
1064
+ l = a.text_value
1065
+ l = 'ae' if l == 'æ'
1066
+ l = 'oe' if l == 'œ'
1067
+ l + b.value
1053
1068
  end
1054
1069
  }
1055
1070
  end
1056
1071
 
1057
- rule full_name_letters
1058
- a:digraph b:full_name_letters {
1059
- def value
1060
- a.value + b.value
1061
- end
1062
- }
1063
- /
1064
- a:valid_name_letters b:digraph c:full_name_letters {
1065
- def value
1066
- a.value + b.value + c.value
1067
- end
1068
- }
1069
- /
1070
- valid_name_letters
1071
- end
1072
-
1073
1072
  rule valid_name_letters
1074
- [a-z\-ëüäöïéåóç]+ {
1073
+ [a-z\-ëæœ]+ {
1075
1074
  def value
1076
- text_value
1075
+ res = ''
1076
+ text_value.split('').each do |l|
1077
+ l = 'ae' if l == 'æ'
1078
+ l = 'oe' if l == 'œ'
1079
+ # not sure if we should normalize ë as well. It is legal in botanical code, but it
1080
+ # might be beneficial to normalize it for the reconsiliation purposes
1081
+ # l = 'e' if l == 'ë'
1082
+ res << l
1083
+ end
1084
+ res
1077
1085
  end
1078
1086
  }
1079
- end
1087
+ end
1080
1088
 
1081
1089
  rule cap_digraph
1082
1090
  "Æ" {
@@ -1092,20 +1100,6 @@ grammar ScientificNameClean
1092
1100
  }
1093
1101
  end
1094
1102
 
1095
- rule digraph
1096
- "æ" {
1097
- def value
1098
- 'ae'
1099
- end
1100
- }
1101
- /
1102
- "œ" {
1103
- def value
1104
- 'oe'
1105
- end
1106
- }
1107
- end
1108
-
1109
1103
  rule year
1110
1104
  b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
1111
1105
  def value
@@ -1177,9 +1171,9 @@ grammar ScientificNameClean
1177
1171
  end
1178
1172
 
1179
1173
  rule multiplication_sign
1180
- "×" {
1174
+ ("×"/"*") {
1181
1175
  def value
1182
- text_value
1176
+ "×"
1183
1177
  end
1184
1178
  }
1185
1179
  end
@@ -8,15 +8,76 @@ module ScientificNameDirty
8
8
 
9
9
  include ScientificNameClean
10
10
 
11
+ module Root0
12
+ def space1
13
+ elements[0]
14
+ end
15
+
16
+ def a
17
+ elements[1]
18
+ end
19
+
20
+ def space2
21
+ elements[2]
22
+ end
23
+ end
24
+
25
+ module Root1
26
+ def value
27
+ a.value.gsub(/\s{2,}/, ' ').strip
28
+ end
29
+
30
+ def canonical
31
+ a.canonical.gsub(/\s{2,}/, ' ').strip
32
+ end
33
+
34
+ def pos
35
+ a.pos
36
+ end
37
+
38
+ def hybrid
39
+ a.hybrid
40
+ end
41
+
42
+ def details
43
+ a.details.class == Array ? a.details : [a.details]
44
+ end
45
+
46
+ def parser_run
47
+ 2
48
+ end
49
+ end
50
+
11
51
  def _nt_root
12
52
  start_index = index
13
53
  if node_cache[:root].has_key?(index)
14
54
  cached = node_cache[:root][index]
15
- @index = cached.interval.end if cached
55
+ if cached
56
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
57
+ @index = cached.interval.end
58
+ end
16
59
  return cached
17
60
  end
18
61
 
19
- r0 = super
62
+ i0, s0 = index, []
63
+ r1 = _nt_space
64
+ s0 << r1
65
+ if r1
66
+ r2 = _nt_scientific_name_5
67
+ s0 << r2
68
+ if r2
69
+ r3 = _nt_space
70
+ s0 << r3
71
+ end
72
+ end
73
+ if s0.last
74
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
75
+ r0.extend(Root0)
76
+ r0.extend(Root1)
77
+ else
78
+ @index = i0
79
+ r0 = nil
80
+ end
20
81
 
21
82
  node_cache[:root][start_index] = r0
22
83
 
@@ -55,7 +116,10 @@ module ScientificNameDirty
55
116
  start_index = index
56
117
  if node_cache[:scientific_name_5].has_key?(index)
57
118
  cached = node_cache[:scientific_name_5][index]
58
- @index = cached.interval.end if cached
119
+ if cached
120
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
121
+ @index = cached.interval.end
122
+ end
59
123
  return cached
60
124
  end
61
125
 
@@ -129,7 +193,7 @@ module ScientificNameDirty
129
193
  elements[0]
130
194
  end
131
195
 
132
- def space
196
+ def space1
133
197
  elements[1]
134
198
  end
135
199
 
@@ -137,7 +201,7 @@ module ScientificNameDirty
137
201
  elements[2]
138
202
  end
139
203
 
140
- def space
204
+ def space2
141
205
  elements[3]
142
206
  end
143
207
 
@@ -168,7 +232,10 @@ module ScientificNameDirty
168
232
  start_index = index
169
233
  if node_cache[:infraspecies].has_key?(index)
170
234
  cached = node_cache[:infraspecies][index]
171
- @index = cached.interval.end if cached
235
+ if cached
236
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
237
+ @index = cached.interval.end
238
+ end
172
239
  return cached
173
240
  end
174
241
 
@@ -276,7 +343,10 @@ module ScientificNameDirty
276
343
  start_index = index
277
344
  if node_cache[:species].has_key?(index)
278
345
  cached = node_cache[:species][index]
279
- @index = cached.interval.end if cached
346
+ if cached
347
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
348
+ @index = cached.interval.end
349
+ end
280
350
  return cached
281
351
  end
282
352
 
@@ -317,6 +387,161 @@ module ScientificNameDirty
317
387
  r0
318
388
  end
319
389
 
390
+ module LatinWord0
391
+ def a
392
+ elements[0]
393
+ end
394
+
395
+ def b
396
+ elements[1]
397
+ end
398
+ end
399
+
400
+ module LatinWord1
401
+ def value
402
+ res = ''
403
+ text_value.split('').each do |l|
404
+ l = 'ae' if l == 'æ'
405
+ l = 'oe' if l == 'œ'
406
+ res << l
407
+ end
408
+ res.tr('àâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž',
409
+ 'aaaaaacceeiiinnnoooooouuurrrrsssz')
410
+ end
411
+ end
412
+
413
+ def _nt_latin_word
414
+ start_index = index
415
+ if node_cache[:latin_word].has_key?(index)
416
+ cached = node_cache[:latin_word][index]
417
+ if cached
418
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
419
+ @index = cached.interval.end
420
+ end
421
+ return cached
422
+ end
423
+
424
+ i0, s0 = index, []
425
+ if has_terminal?('\G[a-z\\-ëæœàâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž]', true, index)
426
+ r1 = true
427
+ @index += 1
428
+ else
429
+ r1 = nil
430
+ end
431
+ s0 << r1
432
+ if r1
433
+ r2 = _nt_valid_name_letters
434
+ s0 << r2
435
+ end
436
+ if s0.last
437
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
438
+ r0.extend(LatinWord0)
439
+ r0.extend(LatinWord1)
440
+ else
441
+ @index = i0
442
+ r0 = nil
443
+ end
444
+
445
+ node_cache[:latin_word][start_index] = r0
446
+
447
+ r0
448
+ end
449
+
450
+ module ValidNameLetters0
451
+ def value
452
+ res = ''
453
+ text_value.split('').each do |l|
454
+ l = 'ae' if l == 'æ'
455
+ l = 'oe' if l == 'œ'
456
+ res << l
457
+ end
458
+ res.tr('àâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž',
459
+ 'aaaaaacceeiiinnnoooooouuurrrrsssz')
460
+ end
461
+ end
462
+
463
+ def _nt_valid_name_letters
464
+ start_index = index
465
+ if node_cache[:valid_name_letters].has_key?(index)
466
+ cached = node_cache[:valid_name_letters][index]
467
+ if cached
468
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
469
+ @index = cached.interval.end
470
+ end
471
+ return cached
472
+ end
473
+
474
+ s0, i0 = [], index
475
+ loop do
476
+ if has_terminal?('\G[a-z\\-ëæœàâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž]', true, index)
477
+ r1 = true
478
+ @index += 1
479
+ else
480
+ r1 = nil
481
+ end
482
+ if r1
483
+ s0 << r1
484
+ else
485
+ break
486
+ end
487
+ end
488
+ if s0.empty?
489
+ @index = i0
490
+ r0 = nil
491
+ else
492
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
493
+ r0.extend(ValidNameLetters0)
494
+ end
495
+
496
+ node_cache[:valid_name_letters][start_index] = r0
497
+
498
+ r0
499
+ end
500
+
501
+ module ValidNameLetters0
502
+ def value
503
+ text_value
504
+ end
505
+ end
506
+
507
+ def _nt_valid_name_letters
508
+ start_index = index
509
+ if node_cache[:valid_name_letters].has_key?(index)
510
+ cached = node_cache[:valid_name_letters][index]
511
+ if cached
512
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
513
+ @index = cached.interval.end
514
+ end
515
+ return cached
516
+ end
517
+
518
+ s0, i0 = [], index
519
+ loop do
520
+ if has_terminal?('\G[a-z\\-ëüäöïéåóç]', true, index)
521
+ r1 = true
522
+ @index += 1
523
+ else
524
+ r1 = nil
525
+ end
526
+ if r1
527
+ s0 << r1
528
+ else
529
+ break
530
+ end
531
+ end
532
+ if s0.empty?
533
+ @index = i0
534
+ r0 = nil
535
+ else
536
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
537
+ r0.extend(ValidNameLetters0)
538
+ end
539
+
540
+ node_cache[:valid_name_letters][start_index] = r0
541
+
542
+ r0
543
+ end
544
+
320
545
  module RightParen0
321
546
  def space
322
547
  elements[1]
@@ -328,7 +553,10 @@ module ScientificNameDirty
328
553
  start_index = index
329
554
  if node_cache[:right_paren].has_key?(index)
330
555
  cached = node_cache[:right_paren][index]
331
- @index = cached.interval.end if cached
556
+ if cached
557
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
558
+ @index = cached.interval.end
559
+ end
332
560
  return cached
333
561
  end
334
562
 
@@ -391,7 +619,10 @@ module ScientificNameDirty
391
619
  start_index = index
392
620
  if node_cache[:left_paren].has_key?(index)
393
621
  cached = node_cache[:left_paren][index]
394
- @index = cached.interval.end if cached
622
+ if cached
623
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
624
+ @index = cached.interval.end
625
+ end
395
626
  return cached
396
627
  end
397
628
 
@@ -503,7 +734,10 @@ module ScientificNameDirty
503
734
  start_index = index
504
735
  if node_cache[:year].has_key?(index)
505
736
  cached = node_cache[:year][index]
506
- @index = cached.interval.end if cached
737
+ if cached
738
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
739
+ @index = cached.interval.end
740
+ end
507
741
  return cached
508
742
  end
509
743
 
@@ -583,7 +817,7 @@ module ScientificNameDirty
583
817
  end
584
818
 
585
819
  module ApproximateYear0
586
- def space
820
+ def space1
587
821
  elements[1]
588
822
  end
589
823
 
@@ -591,7 +825,7 @@ module ScientificNameDirty
591
825
  elements[2]
592
826
  end
593
827
 
594
- def space
828
+ def space2
595
829
  elements[3]
596
830
  end
597
831
 
@@ -615,7 +849,10 @@ module ScientificNameDirty
615
849
  start_index = index
616
850
  if node_cache[:approximate_year].has_key?(index)
617
851
  cached = node_cache[:approximate_year][index]
618
- @index = cached.interval.end if cached
852
+ if cached
853
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
854
+ @index = cached.interval.end
855
+ end
619
856
  return cached
620
857
  end
621
858
 
@@ -703,7 +940,10 @@ module ScientificNameDirty
703
940
  start_index = index
704
941
  if node_cache[:double_year].has_key?(index)
705
942
  cached = node_cache[:double_year][index]
706
- @index = cached.interval.end if cached
943
+ if cached
944
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
945
+ @index = cached.interval.end
946
+ end
707
947
  return cached
708
948
  end
709
949
 
@@ -723,7 +963,7 @@ module ScientificNameDirty
723
963
  s3, i3 = [], index
724
964
  loop do
725
965
  if has_terminal?('\G[0-9]', true, index)
726
- r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
966
+ r4 = true
727
967
  @index += 1
728
968
  else
729
969
  r4 = nil
@@ -743,7 +983,7 @@ module ScientificNameDirty
743
983
  s0 << r3
744
984
  if r3
745
985
  if has_terminal?('\G[A-Za-z]', true, index)
746
- r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
986
+ r6 = true
747
987
  @index += 1
748
988
  else
749
989
  r6 = nil
@@ -756,7 +996,7 @@ module ScientificNameDirty
756
996
  s0 << r5
757
997
  if r5
758
998
  if has_terminal?('\G[\\?]', true, index)
759
- r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
999
+ r8 = true
760
1000
  @index += 1
761
1001
  else
762
1002
  r8 = nil
@@ -810,7 +1050,10 @@ module ScientificNameDirty
810
1050
  start_index = index
811
1051
  if node_cache[:year_number_with_punctuation].has_key?(index)
812
1052
  cached = node_cache[:year_number_with_punctuation][index]
813
- @index = cached.interval.end if cached
1053
+ if cached
1054
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
1055
+ @index = cached.interval.end
1056
+ end
814
1057
  return cached
815
1058
  end
816
1059
 
@@ -857,7 +1100,10 @@ module ScientificNameDirty
857
1100
  start_index = index
858
1101
  if node_cache[:page_number].has_key?(index)
859
1102
  cached = node_cache[:page_number][index]
860
- @index = cached.interval.end if cached
1103
+ if cached
1104
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
1105
+ @index = cached.interval.end
1106
+ end
861
1107
  return cached
862
1108
  end
863
1109
 
@@ -877,7 +1123,7 @@ module ScientificNameDirty
877
1123
  s3, i3 = [], index
878
1124
  loop do
879
1125
  if has_terminal?('\G[\\d]', true, index)
880
- r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
1126
+ r4 = true
881
1127
  @index += 1
882
1128
  else
883
1129
  r4 = nil
@@ -915,7 +1161,10 @@ module ScientificNameDirty
915
1161
  start_index = index
916
1162
  if node_cache[:string_authorship_inconsistencies].has_key?(index)
917
1163
  cached = node_cache[:string_authorship_inconsistencies][index]
918
- @index = cached.interval.end if cached
1164
+ if cached
1165
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
1166
+ @index = cached.interval.end
1167
+ end
919
1168
  return cached
920
1169
  end
921
1170
 
@@ -933,11 +1182,11 @@ module ScientificNameDirty
933
1182
  end
934
1183
 
935
1184
  module Garbage0
936
- def space
1185
+ def space1
937
1186
  elements[0]
938
1187
  end
939
1188
 
940
- def space
1189
+ def space2
941
1190
  elements[2]
942
1191
  end
943
1192
 
@@ -954,7 +1203,10 @@ module ScientificNameDirty
954
1203
  start_index = index
955
1204
  if node_cache[:garbage].has_key?(index)
956
1205
  cached = node_cache[:garbage][index]
957
- @index = cached.interval.end if cached
1206
+ if cached
1207
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
1208
+ @index = cached.interval.end
1209
+ end
958
1210
  return cached
959
1211
  end
960
1212
 
@@ -964,7 +1216,7 @@ module ScientificNameDirty
964
1216
  s1 << r2
965
1217
  if r2
966
1218
  if has_terminal?('\G["\',.]', true, index)
967
- r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
1219
+ r3 = true
968
1220
  @index += 1
969
1221
  else
970
1222
  r3 = nil
@@ -977,7 +1229,7 @@ module ScientificNameDirty
977
1229
  s5, i5 = [], index
978
1230
  loop do
979
1231
  if has_terminal?('\G[^щ]', true, index)
980
- r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
1232
+ r6 = true
981
1233
  @index += 1
982
1234
  else
983
1235
  r6 = nil
@@ -1010,7 +1262,7 @@ module ScientificNameDirty
1010
1262
  s9, i9 = [], index
1011
1263
  loop do
1012
1264
  if has_terminal?('\G[^ш]', true, index)
1013
- r10 = instantiate_node(SyntaxNode,input, index...(index + 1))
1265
+ r10 = true
1014
1266
  @index += 1
1015
1267
  else
1016
1268
  r10 = nil
@@ -1054,3 +1306,4 @@ end
1054
1306
  class ScientificNameDirtyParser < Treetop::Runtime::CompiledParser
1055
1307
  include ScientificNameDirty
1056
1308
  end
1309
+