dimus-biodiversity 0.0.13 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/biodiversity/parser.rb +35 -2
- data/lib/biodiversity/parser/scientific_name_canonical.rb +10 -0
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +10 -0
- data/lib/biodiversity/parser/scientific_name_clean.rb +404 -132
- data/lib/biodiversity/parser/scientific_name_clean.treetop +218 -11
- data/lib/biodiversity/parser/scientific_name_dirty.rb +18 -0
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +18 -0
- data/spec/parser/scientific_name.spec.rb +13 -1
- data/spec/parser/scientific_name_canonical.spec.rb +7 -2
- data/spec/parser/scientific_name_clean.spec.rb +206 -122
- data/spec/parser/scientific_name_dirty.spec.rb +32 -16
- metadata +1 -1
data/lib/biodiversity/parser.rb
CHANGED
@@ -3,18 +3,51 @@ dir = File.dirname(__FILE__)
|
|
3
3
|
require File.join(dir, *%w[parser scientific_name_clean])
|
4
4
|
require File.join(dir, *%w[parser scientific_name_dirty])
|
5
5
|
require File.join(dir, *%w[parser scientific_name_canonical])
|
6
|
-
|
6
|
+
require 'rubygems'
|
7
|
+
require 'json'
|
7
8
|
|
8
9
|
class ScientificNameParser
|
9
10
|
|
10
11
|
def initialize
|
12
|
+
@verbatim = ''
|
11
13
|
@clean = ScientificNameCleanParser.new
|
12
14
|
@dirty = ScientificNameDirtyParser.new
|
13
15
|
@canonical = ScientificNameCanonicalParser.new
|
16
|
+
@node = nil
|
14
17
|
end
|
15
18
|
|
16
19
|
def parse(a_string)
|
17
|
-
@
|
20
|
+
@verbatim = a_string
|
21
|
+
@node = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) rescue nil
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def pos
|
26
|
+
@node.pos
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_json
|
30
|
+
parsed = !!@node
|
31
|
+
if parsed
|
32
|
+
res = {
|
33
|
+
:parsed => parsed,
|
34
|
+
:verbatim => self.text_value }
|
35
|
+
if parsed
|
36
|
+
res.merge!({
|
37
|
+
:normalized => self.value,
|
38
|
+
:canonical => self.canonical
|
39
|
+
})
|
40
|
+
res.merge!(self.details)
|
41
|
+
end
|
42
|
+
res = {:scientificName => res}
|
43
|
+
JSON.generate res
|
44
|
+
else
|
45
|
+
JSON.generate({:parsed => parsed, :verbatim => @verbatim})
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def pos_to_json
|
50
|
+
JSON.generate @node.pos rescue ''
|
18
51
|
end
|
19
52
|
|
20
53
|
end
|
@@ -46,6 +46,11 @@ module ScientificNameCanonical
|
|
46
46
|
def canonical
|
47
47
|
a.canonical
|
48
48
|
end
|
49
|
+
|
50
|
+
def pos
|
51
|
+
a.pos
|
52
|
+
end
|
53
|
+
|
49
54
|
def details
|
50
55
|
a.details.merge(:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value)
|
51
56
|
end
|
@@ -72,6 +77,11 @@ module ScientificNameCanonical
|
|
72
77
|
def canonical
|
73
78
|
a.canonical
|
74
79
|
end
|
80
|
+
|
81
|
+
def pos
|
82
|
+
a.pos
|
83
|
+
end
|
84
|
+
|
75
85
|
def details
|
76
86
|
a.details.merge(:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value)
|
77
87
|
end
|
@@ -15,6 +15,11 @@ grammar ScientificNameCanonical
|
|
15
15
|
def canonical
|
16
16
|
a.canonical
|
17
17
|
end
|
18
|
+
|
19
|
+
def pos
|
20
|
+
a.pos
|
21
|
+
end
|
22
|
+
|
18
23
|
def details
|
19
24
|
a.details.merge(:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value)
|
20
25
|
end
|
@@ -27,6 +32,11 @@ grammar ScientificNameCanonical
|
|
27
32
|
def canonical
|
28
33
|
a.canonical
|
29
34
|
end
|
35
|
+
|
36
|
+
def pos
|
37
|
+
a.pos
|
38
|
+
end
|
39
|
+
|
30
40
|
def details
|
31
41
|
a.details.merge(:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value)
|
32
42
|
end
|
@@ -39,6 +39,11 @@ module ScientificNameClean
|
|
39
39
|
def canonical
|
40
40
|
a.canonical + " × " + b.canonical
|
41
41
|
end
|
42
|
+
|
43
|
+
def pos
|
44
|
+
a.pos.merge(b.pos)
|
45
|
+
end
|
46
|
+
|
42
47
|
def details
|
43
48
|
{:hybrid => {:scientific_name1 => a.details, :scientific_name2 => b.details}}
|
44
49
|
end
|
@@ -72,6 +77,10 @@ module ScientificNameClean
|
|
72
77
|
a.canonical
|
73
78
|
end
|
74
79
|
|
80
|
+
def pos
|
81
|
+
a.pos
|
82
|
+
end
|
83
|
+
|
75
84
|
def details
|
76
85
|
{:hybrid => {:scientific_name1 => a.details, :scientific_name2 => "?"}}
|
77
86
|
end
|
@@ -221,6 +230,10 @@ module ScientificNameClean
|
|
221
230
|
def canonical
|
222
231
|
a.canonical
|
223
232
|
end
|
233
|
+
|
234
|
+
def pos
|
235
|
+
a.pos.merge(b.pos).merge(d.pos)
|
236
|
+
end
|
224
237
|
|
225
238
|
def details
|
226
239
|
a.details.merge(b.details).merge(c.details(d)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ').strip})
|
@@ -265,6 +278,10 @@ module ScientificNameClean
|
|
265
278
|
def canonical
|
266
279
|
a.canonical
|
267
280
|
end
|
281
|
+
|
282
|
+
def pos
|
283
|
+
a.pos.merge(c.pos)
|
284
|
+
end
|
268
285
|
|
269
286
|
def details
|
270
287
|
a.details.merge(b.details(c)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
|
@@ -308,6 +325,11 @@ module ScientificNameClean
|
|
308
325
|
def canonical
|
309
326
|
a.canonical
|
310
327
|
end
|
328
|
+
|
329
|
+
def pos
|
330
|
+
a.pos.merge(b.pos)
|
331
|
+
end
|
332
|
+
|
311
333
|
def details
|
312
334
|
a.details.merge(b.details).merge(c.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
|
313
335
|
end
|
@@ -342,6 +364,11 @@ module ScientificNameClean
|
|
342
364
|
def canonical
|
343
365
|
a.canonical
|
344
366
|
end
|
367
|
+
|
368
|
+
def pos
|
369
|
+
a.pos.merge(b.pos)
|
370
|
+
end
|
371
|
+
|
345
372
|
def details
|
346
373
|
a.details.merge(b.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
|
347
374
|
end
|
@@ -378,6 +405,10 @@ module ScientificNameClean
|
|
378
405
|
a.canonical
|
379
406
|
end
|
380
407
|
|
408
|
+
def pos
|
409
|
+
a.pos.merge(b.pos)
|
410
|
+
end
|
411
|
+
|
381
412
|
def details
|
382
413
|
a.details.merge(b.details).merge({:is_valid => false}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
|
383
414
|
end
|
@@ -764,6 +795,11 @@ module ScientificNameClean
|
|
764
795
|
def canonical
|
765
796
|
(a.canonical + " " + c.canonical).gsub(/\s+/,' ')
|
766
797
|
end
|
798
|
+
|
799
|
+
def pos
|
800
|
+
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
|
801
|
+
end
|
802
|
+
|
767
803
|
def details
|
768
804
|
a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ')})
|
769
805
|
end
|
@@ -908,7 +944,11 @@ module ScientificNameClean
|
|
908
944
|
def value
|
909
945
|
a.value + " " + b.value
|
910
946
|
end
|
911
|
-
|
947
|
+
|
948
|
+
def pos
|
949
|
+
a.pos.merge(b.pos)
|
950
|
+
end
|
951
|
+
|
912
952
|
def details
|
913
953
|
a.details.merge(b.details)
|
914
954
|
end
|
@@ -941,6 +981,10 @@ module ScientificNameClean
|
|
941
981
|
a.value + " ex " + b.value
|
942
982
|
end
|
943
983
|
|
984
|
+
def pos
|
985
|
+
a.pos.merge(b.pos)
|
986
|
+
end
|
987
|
+
|
944
988
|
def details
|
945
989
|
{:revised_name_authors => {:revised_authors => a.details[:authors], :authors => b.details[:authors]}}
|
946
990
|
end
|
@@ -964,6 +1008,11 @@ module ScientificNameClean
|
|
964
1008
|
def value
|
965
1009
|
a.value + " " + b.value
|
966
1010
|
end
|
1011
|
+
|
1012
|
+
def pos
|
1013
|
+
a.pos.merge(b.pos)
|
1014
|
+
end
|
1015
|
+
|
967
1016
|
def details
|
968
1017
|
a.details.merge(b.details)
|
969
1018
|
end
|
@@ -1096,6 +1145,11 @@ module ScientificNameClean
|
|
1096
1145
|
def value
|
1097
1146
|
a.value + " " + b.value
|
1098
1147
|
end
|
1148
|
+
|
1149
|
+
def pos
|
1150
|
+
a.pos.merge(b.pos)
|
1151
|
+
end
|
1152
|
+
|
1099
1153
|
def details
|
1100
1154
|
a.details.merge(b.details)
|
1101
1155
|
end
|
@@ -1189,6 +1243,11 @@ module ScientificNameClean
|
|
1189
1243
|
def value
|
1190
1244
|
"(" + a.value + " " + b.value + ")"
|
1191
1245
|
end
|
1246
|
+
|
1247
|
+
def pos
|
1248
|
+
a.pos.merge(b.pos)
|
1249
|
+
end
|
1250
|
+
|
1192
1251
|
def details
|
1193
1252
|
{:orig_authors => a.details[:authors], :year => b.details[:year]}
|
1194
1253
|
end
|
@@ -1220,6 +1279,11 @@ module ScientificNameClean
|
|
1220
1279
|
def value
|
1221
1280
|
"(" + a.value + ")"
|
1222
1281
|
end
|
1282
|
+
|
1283
|
+
def pos
|
1284
|
+
a.pos
|
1285
|
+
end
|
1286
|
+
|
1223
1287
|
def details
|
1224
1288
|
{:orig_authors => a.details[:authors]}
|
1225
1289
|
end
|
@@ -1244,6 +1308,11 @@ module ScientificNameClean
|
|
1244
1308
|
def value
|
1245
1309
|
"(" + a.value + ")"
|
1246
1310
|
end
|
1311
|
+
|
1312
|
+
def pos
|
1313
|
+
a.pos
|
1314
|
+
end
|
1315
|
+
|
1247
1316
|
def details
|
1248
1317
|
{:orig_authors => a.details[:authors]}
|
1249
1318
|
end
|
@@ -1275,6 +1344,11 @@ module ScientificNameClean
|
|
1275
1344
|
def value
|
1276
1345
|
"(" + a.value + ")"
|
1277
1346
|
end
|
1347
|
+
|
1348
|
+
def pos
|
1349
|
+
a.pos
|
1350
|
+
end
|
1351
|
+
|
1278
1352
|
def details
|
1279
1353
|
{:orig_authors => a.details[:authors]}
|
1280
1354
|
end
|
@@ -1550,6 +1624,10 @@ module ScientificNameClean
|
|
1550
1624
|
"(" + a.value + ")"
|
1551
1625
|
end
|
1552
1626
|
|
1627
|
+
def pos
|
1628
|
+
a.pos
|
1629
|
+
end
|
1630
|
+
|
1553
1631
|
def details
|
1554
1632
|
{:original_revised_name_authors => a.details[:revised_name_authors]}
|
1555
1633
|
end
|
@@ -1622,6 +1700,11 @@ module ScientificNameClean
|
|
1622
1700
|
def value
|
1623
1701
|
a.value + " ex " + b.value
|
1624
1702
|
end
|
1703
|
+
|
1704
|
+
def pos
|
1705
|
+
a.pos.merge(b.pos)
|
1706
|
+
end
|
1707
|
+
|
1625
1708
|
def details
|
1626
1709
|
{:revised_name_authors =>{:revised_authors => a.details[:authors], :authors => b.details[:authors]}}
|
1627
1710
|
end
|
@@ -1690,6 +1773,11 @@ module ScientificNameClean
|
|
1690
1773
|
def value
|
1691
1774
|
a.value + " " + b.value
|
1692
1775
|
end
|
1776
|
+
|
1777
|
+
def pos
|
1778
|
+
a.pos.merge(b.pos)
|
1779
|
+
end
|
1780
|
+
|
1693
1781
|
def details
|
1694
1782
|
{:authors => {:names => a.details[:authors][:names]}.merge(b.details)}
|
1695
1783
|
end
|
@@ -1767,6 +1855,11 @@ module ScientificNameClean
|
|
1767
1855
|
def value
|
1768
1856
|
text_value
|
1769
1857
|
end
|
1858
|
+
|
1859
|
+
def pos
|
1860
|
+
{interval.begin => ['unknown_author', interval.end]}
|
1861
|
+
end
|
1862
|
+
|
1770
1863
|
def details
|
1771
1864
|
{:authors => "unknown"}
|
1772
1865
|
end
|
@@ -1903,6 +1996,10 @@ module ScientificNameClean
|
|
1903
1996
|
sep.apply(a,b)
|
1904
1997
|
end
|
1905
1998
|
|
1999
|
+
def pos
|
2000
|
+
sep.pos(a,b)
|
2001
|
+
end
|
2002
|
+
|
1906
2003
|
def details
|
1907
2004
|
sep.details(a,b)
|
1908
2005
|
end
|
@@ -1967,7 +2064,11 @@ module ScientificNameClean
|
|
1967
2064
|
sep = " et" if ["&","and","et"].include? sep
|
1968
2065
|
a.value + sep + " " + b.value
|
1969
2066
|
end
|
1970
|
-
|
2067
|
+
|
2068
|
+
def pos(a,b)
|
2069
|
+
a.pos.merge(b.pos)
|
2070
|
+
end
|
2071
|
+
|
1971
2072
|
def details(a,b)
|
1972
2073
|
{:authors => {:names => a.details[:authors][:names] + b.details[:authors][:names]}}
|
1973
2074
|
end
|
@@ -2064,7 +2165,11 @@ module ScientificNameClean
|
|
2064
2165
|
def value
|
2065
2166
|
a.value + " " + b.value
|
2066
2167
|
end
|
2067
|
-
|
2168
|
+
|
2169
|
+
def pos
|
2170
|
+
a.pos.merge(b.pos)
|
2171
|
+
end
|
2172
|
+
|
2068
2173
|
def details
|
2069
2174
|
{:authors => {:names => [value]}}
|
2070
2175
|
end
|
@@ -2127,6 +2232,11 @@ module ScientificNameClean
|
|
2127
2232
|
def value
|
2128
2233
|
text_value.strip
|
2129
2234
|
end
|
2235
|
+
|
2236
|
+
def pos
|
2237
|
+
{interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
|
2238
|
+
end
|
2239
|
+
|
2130
2240
|
def details
|
2131
2241
|
{:authors => {:names => [value]}}
|
2132
2242
|
end
|
@@ -2136,6 +2246,12 @@ module ScientificNameClean
|
|
2136
2246
|
def value
|
2137
2247
|
text_value.strip
|
2138
2248
|
end
|
2249
|
+
|
2250
|
+
def pos
|
2251
|
+
#cheating because there are several words in some of them
|
2252
|
+
{interval.begin => ['author_word', interval.end]}
|
2253
|
+
end
|
2254
|
+
|
2139
2255
|
def details
|
2140
2256
|
{:authors => {:names => [value]}}
|
2141
2257
|
end
|
@@ -2148,6 +2264,11 @@ module ScientificNameClean
|
|
2148
2264
|
def value
|
2149
2265
|
text_value.gsub(/\s+/, " ").strip
|
2150
2266
|
end
|
2267
|
+
|
2268
|
+
def pos
|
2269
|
+
{interval.begin => ['author_word', interval.end]}
|
2270
|
+
end
|
2271
|
+
|
2151
2272
|
def details
|
2152
2273
|
{:authors => {:names => [value]}}
|
2153
2274
|
end
|
@@ -2572,6 +2693,11 @@ module ScientificNameClean
|
|
2572
2693
|
def canonical
|
2573
2694
|
a.canonical
|
2574
2695
|
end
|
2696
|
+
|
2697
|
+
def pos
|
2698
|
+
a.pos
|
2699
|
+
end
|
2700
|
+
|
2575
2701
|
def details
|
2576
2702
|
a.details.merge(b.details).merge(c.details)
|
2577
2703
|
end
|
@@ -2603,6 +2729,10 @@ module ScientificNameClean
|
|
2603
2729
|
a.canonical + b.canonical
|
2604
2730
|
end
|
2605
2731
|
|
2732
|
+
def pos
|
2733
|
+
a.pos.merge(b.pos)
|
2734
|
+
end
|
2735
|
+
|
2606
2736
|
def details
|
2607
2737
|
a.details.merge(b.details)
|
2608
2738
|
end
|
@@ -2636,6 +2766,10 @@ module ScientificNameClean
|
|
2636
2766
|
value
|
2637
2767
|
end
|
2638
2768
|
|
2769
|
+
def pos
|
2770
|
+
a.pos.merge({b.interval.begin => ['subspecies', b.interval.end]})
|
2771
|
+
end
|
2772
|
+
|
2639
2773
|
def details
|
2640
2774
|
a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}})
|
2641
2775
|
end
|
@@ -2796,6 +2930,10 @@ module ScientificNameClean
|
|
2796
2930
|
a.canonical + b.canonical
|
2797
2931
|
end
|
2798
2932
|
|
2933
|
+
def pos
|
2934
|
+
a.pos.merge(b.pos)
|
2935
|
+
end
|
2936
|
+
|
2799
2937
|
def details
|
2800
2938
|
c = a.details[:subspecies] + b.details_subspecies
|
2801
2939
|
a.details.merge({:subspecies => c, :is_valid => false})
|
@@ -2868,6 +3006,10 @@ module ScientificNameClean
|
|
2868
3006
|
def canonical
|
2869
3007
|
sel.canonical(a)
|
2870
3008
|
end
|
3009
|
+
|
3010
|
+
def pos
|
3011
|
+
{a.interval.begin => ['subspecies', a.interval.end]}
|
3012
|
+
end
|
2871
3013
|
def details
|
2872
3014
|
sel.details(a)
|
2873
3015
|
end
|
@@ -3633,6 +3775,11 @@ module ScientificNameClean
|
|
3633
3775
|
def canonical
|
3634
3776
|
a.value + " " + b.value
|
3635
3777
|
end
|
3778
|
+
|
3779
|
+
def pos
|
3780
|
+
{a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
|
3781
|
+
end
|
3782
|
+
|
3636
3783
|
def details
|
3637
3784
|
{:genus => a.value, :species => b.value, :cross => 'before'}
|
3638
3785
|
end
|
@@ -3659,6 +3806,11 @@ module ScientificNameClean
|
|
3659
3806
|
def canonical
|
3660
3807
|
a.value
|
3661
3808
|
end
|
3809
|
+
|
3810
|
+
def pos
|
3811
|
+
{a.interval.begin => ['uninomial', a.interval.end]}
|
3812
|
+
end
|
3813
|
+
|
3662
3814
|
def details
|
3663
3815
|
{:uninomial => a.value, :cross => 'before'}
|
3664
3816
|
end
|
@@ -3693,6 +3845,11 @@ module ScientificNameClean
|
|
3693
3845
|
def canonical
|
3694
3846
|
a.value + " " + b.value
|
3695
3847
|
end
|
3848
|
+
|
3849
|
+
def pos
|
3850
|
+
{a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
|
3851
|
+
end
|
3852
|
+
|
3696
3853
|
def details
|
3697
3854
|
{:genus => a.value, :species => b.value, :cross => 'inside'}
|
3698
3855
|
end
|
@@ -3727,6 +3884,11 @@ module ScientificNameClean
|
|
3727
3884
|
def canonical
|
3728
3885
|
a.value + " " + c.value
|
3729
3886
|
end
|
3887
|
+
|
3888
|
+
def pos
|
3889
|
+
{a.interval.begin => ['genus', a.interval.end]}.merge(b.pos).merge({c.interval.begin => ['subspecies', c.interval.end]})
|
3890
|
+
end
|
3891
|
+
|
3730
3892
|
def details
|
3731
3893
|
{:genus => a.value, :subgenus => b.details, :species => c.value}
|
3732
3894
|
end
|
@@ -3754,6 +3916,10 @@ module ScientificNameClean
|
|
3754
3916
|
value
|
3755
3917
|
end
|
3756
3918
|
|
3919
|
+
def pos
|
3920
|
+
{a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
|
3921
|
+
end
|
3922
|
+
|
3757
3923
|
def details
|
3758
3924
|
{:genus => a.value, :species => b.value}
|
3759
3925
|
end
|
@@ -3934,6 +4100,11 @@ module ScientificNameClean
|
|
3934
4100
|
def value
|
3935
4101
|
"(" + a.value + ")"
|
3936
4102
|
end
|
4103
|
+
|
4104
|
+
def pos
|
4105
|
+
{a.interval.begin => ['subgenus', a.interval.end]}
|
4106
|
+
end
|
4107
|
+
|
3937
4108
|
def details
|
3938
4109
|
a.value
|
3939
4110
|
end
|
@@ -4119,13 +4290,17 @@ module ScientificNameClean
|
|
4119
4290
|
|
4120
4291
|
module CapLatinWord1
|
4121
4292
|
def value
|
4122
|
-
a.text_value + b.value
|
4293
|
+
(a.value rescue a.text_value) + b.value
|
4123
4294
|
end
|
4124
4295
|
|
4125
4296
|
def canonical
|
4126
4297
|
value
|
4127
4298
|
end
|
4128
4299
|
|
4300
|
+
def pos
|
4301
|
+
{a.interval.begin => ['uninomial', a.interval.end]}
|
4302
|
+
end
|
4303
|
+
|
4129
4304
|
def details
|
4130
4305
|
{:uninomial => value}
|
4131
4306
|
end
|
@@ -4143,13 +4318,17 @@ module ScientificNameClean
|
|
4143
4318
|
|
4144
4319
|
module CapLatinWord3
|
4145
4320
|
def value
|
4146
|
-
a.text_value + b.value
|
4321
|
+
(a.value rescue a.text_value) + b.value
|
4147
4322
|
end
|
4148
4323
|
|
4149
4324
|
def canonical
|
4150
4325
|
value
|
4151
4326
|
end
|
4152
4327
|
|
4328
|
+
def pos
|
4329
|
+
{a.interval.begin => ['uninomial',b.interval.end]}
|
4330
|
+
end
|
4331
|
+
|
4153
4332
|
def details
|
4154
4333
|
{:uninomial => value}
|
4155
4334
|
end
|
@@ -4164,6 +4343,10 @@ module ScientificNameClean
|
|
4164
4343
|
value
|
4165
4344
|
end
|
4166
4345
|
|
4346
|
+
def pos
|
4347
|
+
{interval.begin => ['uninomial', interval.end]}
|
4348
|
+
end
|
4349
|
+
|
4167
4350
|
def details
|
4168
4351
|
{:uninomial => value}
|
4169
4352
|
end
|
@@ -4179,25 +4362,37 @@ module ScientificNameClean
|
|
4179
4362
|
|
4180
4363
|
i0 = index
|
4181
4364
|
i1, s1 = index, []
|
4182
|
-
|
4183
|
-
|
4365
|
+
i2 = index
|
4366
|
+
if input.index(Regexp.new('[A-Z]'), index) == index
|
4367
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4184
4368
|
@index += 1
|
4185
4369
|
else
|
4186
|
-
|
4370
|
+
r3 = nil
|
4371
|
+
end
|
4372
|
+
if r3
|
4373
|
+
r2 = r3
|
4374
|
+
else
|
4375
|
+
r4 = _nt_cap_digraph
|
4376
|
+
if r4
|
4377
|
+
r2 = r4
|
4378
|
+
else
|
4379
|
+
self.index = i2
|
4380
|
+
r2 = nil
|
4381
|
+
end
|
4187
4382
|
end
|
4188
4383
|
s1 << r2
|
4189
4384
|
if r2
|
4190
|
-
|
4191
|
-
s1 <<
|
4192
|
-
if
|
4385
|
+
r5 = _nt_latin_word
|
4386
|
+
s1 << r5
|
4387
|
+
if r5
|
4193
4388
|
if input.index("?", index) == index
|
4194
|
-
|
4389
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4195
4390
|
@index += 1
|
4196
4391
|
else
|
4197
4392
|
terminal_parse_failure("?")
|
4198
|
-
|
4393
|
+
r6 = nil
|
4199
4394
|
end
|
4200
|
-
s1 <<
|
4395
|
+
s1 << r6
|
4201
4396
|
end
|
4202
4397
|
end
|
4203
4398
|
if s1.last
|
@@ -4211,241 +4406,253 @@ module ScientificNameClean
|
|
4211
4406
|
if r1
|
4212
4407
|
r0 = r1
|
4213
4408
|
else
|
4214
|
-
|
4215
|
-
|
4216
|
-
|
4409
|
+
i7, s7 = index, []
|
4410
|
+
i8 = index
|
4411
|
+
if input.index(Regexp.new('[A-Z]'), index) == index
|
4412
|
+
r9 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4217
4413
|
@index += 1
|
4218
4414
|
else
|
4219
|
-
|
4415
|
+
r9 = nil
|
4220
4416
|
end
|
4221
|
-
|
4222
|
-
|
4223
|
-
|
4224
|
-
|
4417
|
+
if r9
|
4418
|
+
r8 = r9
|
4419
|
+
else
|
4420
|
+
r10 = _nt_cap_digraph
|
4421
|
+
if r10
|
4422
|
+
r8 = r10
|
4423
|
+
else
|
4424
|
+
self.index = i8
|
4425
|
+
r8 = nil
|
4426
|
+
end
|
4225
4427
|
end
|
4226
|
-
|
4227
|
-
|
4228
|
-
|
4229
|
-
|
4428
|
+
s7 << r8
|
4429
|
+
if r8
|
4430
|
+
r11 = _nt_latin_word
|
4431
|
+
s7 << r11
|
4432
|
+
end
|
4433
|
+
if s7.last
|
4434
|
+
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
4435
|
+
r7.extend(CapLatinWord2)
|
4436
|
+
r7.extend(CapLatinWord3)
|
4230
4437
|
else
|
4231
|
-
self.index =
|
4232
|
-
|
4438
|
+
self.index = i7
|
4439
|
+
r7 = nil
|
4233
4440
|
end
|
4234
|
-
if
|
4235
|
-
r0 =
|
4441
|
+
if r7
|
4442
|
+
r0 = r7
|
4236
4443
|
else
|
4237
|
-
|
4444
|
+
i12 = index
|
4238
4445
|
if input.index("Ca", index) == index
|
4239
|
-
|
4446
|
+
r13 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4240
4447
|
@index += 2
|
4241
4448
|
else
|
4242
4449
|
terminal_parse_failure("Ca")
|
4243
|
-
|
4450
|
+
r13 = nil
|
4244
4451
|
end
|
4245
|
-
if
|
4246
|
-
|
4247
|
-
|
4452
|
+
if r13
|
4453
|
+
r12 = r13
|
4454
|
+
r12.extend(CapLatinWord4)
|
4248
4455
|
else
|
4249
4456
|
if input.index("Ea", index) == index
|
4250
|
-
|
4457
|
+
r14 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4251
4458
|
@index += 2
|
4252
4459
|
else
|
4253
4460
|
terminal_parse_failure("Ea")
|
4254
|
-
|
4461
|
+
r14 = nil
|
4255
4462
|
end
|
4256
|
-
if
|
4257
|
-
|
4258
|
-
|
4463
|
+
if r14
|
4464
|
+
r12 = r14
|
4465
|
+
r12.extend(CapLatinWord4)
|
4259
4466
|
else
|
4260
4467
|
if input.index("Ge", index) == index
|
4261
|
-
|
4468
|
+
r15 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4262
4469
|
@index += 2
|
4263
4470
|
else
|
4264
4471
|
terminal_parse_failure("Ge")
|
4265
|
-
|
4472
|
+
r15 = nil
|
4266
4473
|
end
|
4267
|
-
if
|
4268
|
-
|
4269
|
-
|
4474
|
+
if r15
|
4475
|
+
r12 = r15
|
4476
|
+
r12.extend(CapLatinWord4)
|
4270
4477
|
else
|
4271
4478
|
if input.index("Ia", index) == index
|
4272
|
-
|
4479
|
+
r16 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4273
4480
|
@index += 2
|
4274
4481
|
else
|
4275
4482
|
terminal_parse_failure("Ia")
|
4276
|
-
|
4483
|
+
r16 = nil
|
4277
4484
|
end
|
4278
|
-
if
|
4279
|
-
|
4280
|
-
|
4485
|
+
if r16
|
4486
|
+
r12 = r16
|
4487
|
+
r12.extend(CapLatinWord4)
|
4281
4488
|
else
|
4282
4489
|
if input.index("Io", index) == index
|
4283
|
-
|
4490
|
+
r17 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4284
4491
|
@index += 2
|
4285
4492
|
else
|
4286
4493
|
terminal_parse_failure("Io")
|
4287
|
-
|
4494
|
+
r17 = nil
|
4288
4495
|
end
|
4289
|
-
if
|
4290
|
-
|
4291
|
-
|
4496
|
+
if r17
|
4497
|
+
r12 = r17
|
4498
|
+
r12.extend(CapLatinWord4)
|
4292
4499
|
else
|
4293
4500
|
if input.index("Io", index) == index
|
4294
|
-
|
4501
|
+
r18 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4295
4502
|
@index += 2
|
4296
4503
|
else
|
4297
4504
|
terminal_parse_failure("Io")
|
4298
|
-
|
4505
|
+
r18 = nil
|
4299
4506
|
end
|
4300
|
-
if
|
4301
|
-
|
4302
|
-
|
4507
|
+
if r18
|
4508
|
+
r12 = r18
|
4509
|
+
r12.extend(CapLatinWord4)
|
4303
4510
|
else
|
4304
4511
|
if input.index("Ix", index) == index
|
4305
|
-
|
4512
|
+
r19 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4306
4513
|
@index += 2
|
4307
4514
|
else
|
4308
4515
|
terminal_parse_failure("Ix")
|
4309
|
-
|
4516
|
+
r19 = nil
|
4310
4517
|
end
|
4311
|
-
if
|
4312
|
-
|
4313
|
-
|
4518
|
+
if r19
|
4519
|
+
r12 = r19
|
4520
|
+
r12.extend(CapLatinWord4)
|
4314
4521
|
else
|
4315
4522
|
if input.index("Lo", index) == index
|
4316
|
-
|
4523
|
+
r20 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4317
4524
|
@index += 2
|
4318
4525
|
else
|
4319
4526
|
terminal_parse_failure("Lo")
|
4320
|
-
|
4527
|
+
r20 = nil
|
4321
4528
|
end
|
4322
|
-
if
|
4323
|
-
|
4324
|
-
|
4529
|
+
if r20
|
4530
|
+
r12 = r20
|
4531
|
+
r12.extend(CapLatinWord4)
|
4325
4532
|
else
|
4326
4533
|
if input.index("Oa", index) == index
|
4327
|
-
|
4534
|
+
r21 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4328
4535
|
@index += 2
|
4329
4536
|
else
|
4330
4537
|
terminal_parse_failure("Oa")
|
4331
|
-
|
4538
|
+
r21 = nil
|
4332
4539
|
end
|
4333
|
-
if
|
4334
|
-
|
4335
|
-
|
4540
|
+
if r21
|
4541
|
+
r12 = r21
|
4542
|
+
r12.extend(CapLatinWord4)
|
4336
4543
|
else
|
4337
4544
|
if input.index("Ra", index) == index
|
4338
|
-
|
4545
|
+
r22 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4339
4546
|
@index += 2
|
4340
4547
|
else
|
4341
4548
|
terminal_parse_failure("Ra")
|
4342
|
-
|
4549
|
+
r22 = nil
|
4343
4550
|
end
|
4344
|
-
if
|
4345
|
-
|
4346
|
-
|
4551
|
+
if r22
|
4552
|
+
r12 = r22
|
4553
|
+
r12.extend(CapLatinWord4)
|
4347
4554
|
else
|
4348
4555
|
if input.index("Ty", index) == index
|
4349
|
-
|
4556
|
+
r23 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4350
4557
|
@index += 2
|
4351
4558
|
else
|
4352
4559
|
terminal_parse_failure("Ty")
|
4353
|
-
|
4560
|
+
r23 = nil
|
4354
4561
|
end
|
4355
|
-
if
|
4356
|
-
|
4357
|
-
|
4562
|
+
if r23
|
4563
|
+
r12 = r23
|
4564
|
+
r12.extend(CapLatinWord4)
|
4358
4565
|
else
|
4359
4566
|
if input.index("Ua", index) == index
|
4360
|
-
|
4567
|
+
r24 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4361
4568
|
@index += 2
|
4362
4569
|
else
|
4363
4570
|
terminal_parse_failure("Ua")
|
4364
|
-
|
4571
|
+
r24 = nil
|
4365
4572
|
end
|
4366
|
-
if
|
4367
|
-
|
4368
|
-
|
4573
|
+
if r24
|
4574
|
+
r12 = r24
|
4575
|
+
r12.extend(CapLatinWord4)
|
4369
4576
|
else
|
4370
4577
|
if input.index("Aa", index) == index
|
4371
|
-
|
4578
|
+
r25 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4372
4579
|
@index += 2
|
4373
4580
|
else
|
4374
4581
|
terminal_parse_failure("Aa")
|
4375
|
-
|
4582
|
+
r25 = nil
|
4376
4583
|
end
|
4377
|
-
if
|
4378
|
-
|
4379
|
-
|
4584
|
+
if r25
|
4585
|
+
r12 = r25
|
4586
|
+
r12.extend(CapLatinWord4)
|
4380
4587
|
else
|
4381
4588
|
if input.index("Ja", index) == index
|
4382
|
-
|
4589
|
+
r26 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4383
4590
|
@index += 2
|
4384
4591
|
else
|
4385
4592
|
terminal_parse_failure("Ja")
|
4386
|
-
|
4593
|
+
r26 = nil
|
4387
4594
|
end
|
4388
|
-
if
|
4389
|
-
|
4390
|
-
|
4595
|
+
if r26
|
4596
|
+
r12 = r26
|
4597
|
+
r12.extend(CapLatinWord4)
|
4391
4598
|
else
|
4392
4599
|
if input.index("Zu", index) == index
|
4393
|
-
|
4600
|
+
r27 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4394
4601
|
@index += 2
|
4395
4602
|
else
|
4396
4603
|
terminal_parse_failure("Zu")
|
4397
|
-
|
4604
|
+
r27 = nil
|
4398
4605
|
end
|
4399
|
-
if
|
4400
|
-
|
4401
|
-
|
4606
|
+
if r27
|
4607
|
+
r12 = r27
|
4608
|
+
r12.extend(CapLatinWord4)
|
4402
4609
|
else
|
4403
4610
|
if input.index("La", index) == index
|
4404
|
-
|
4611
|
+
r28 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4405
4612
|
@index += 2
|
4406
4613
|
else
|
4407
4614
|
terminal_parse_failure("La")
|
4408
|
-
|
4615
|
+
r28 = nil
|
4409
4616
|
end
|
4410
|
-
if
|
4411
|
-
|
4412
|
-
|
4617
|
+
if r28
|
4618
|
+
r12 = r28
|
4619
|
+
r12.extend(CapLatinWord4)
|
4413
4620
|
else
|
4414
4621
|
if input.index("Qu", index) == index
|
4415
|
-
|
4622
|
+
r29 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4416
4623
|
@index += 2
|
4417
4624
|
else
|
4418
4625
|
terminal_parse_failure("Qu")
|
4419
|
-
|
4626
|
+
r29 = nil
|
4420
4627
|
end
|
4421
|
-
if
|
4422
|
-
|
4423
|
-
|
4628
|
+
if r29
|
4629
|
+
r12 = r29
|
4630
|
+
r12.extend(CapLatinWord4)
|
4424
4631
|
else
|
4425
4632
|
if input.index("As", index) == index
|
4426
|
-
|
4633
|
+
r30 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4427
4634
|
@index += 2
|
4428
4635
|
else
|
4429
4636
|
terminal_parse_failure("As")
|
4430
|
-
|
4637
|
+
r30 = nil
|
4431
4638
|
end
|
4432
|
-
if
|
4433
|
-
|
4434
|
-
|
4639
|
+
if r30
|
4640
|
+
r12 = r30
|
4641
|
+
r12.extend(CapLatinWord4)
|
4435
4642
|
else
|
4436
4643
|
if input.index("Ba", index) == index
|
4437
|
-
|
4644
|
+
r31 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4438
4645
|
@index += 2
|
4439
4646
|
else
|
4440
4647
|
terminal_parse_failure("Ba")
|
4441
|
-
|
4648
|
+
r31 = nil
|
4442
4649
|
end
|
4443
|
-
if
|
4444
|
-
|
4445
|
-
|
4650
|
+
if r31
|
4651
|
+
r12 = r31
|
4652
|
+
r12.extend(CapLatinWord4)
|
4446
4653
|
else
|
4447
|
-
self.index =
|
4448
|
-
|
4654
|
+
self.index = i12
|
4655
|
+
r12 = nil
|
4449
4656
|
end
|
4450
4657
|
end
|
4451
4658
|
end
|
@@ -4465,8 +4672,8 @@ module ScientificNameClean
|
|
4465
4672
|
end
|
4466
4673
|
end
|
4467
4674
|
end
|
4468
|
-
if
|
4469
|
-
r0 =
|
4675
|
+
if r12
|
4676
|
+
r0 = r12
|
4470
4677
|
else
|
4471
4678
|
self.index = i0
|
4472
4679
|
r0 = nil
|
@@ -4727,6 +4934,59 @@ module ScientificNameClean
|
|
4727
4934
|
return r0
|
4728
4935
|
end
|
4729
4936
|
|
4937
|
+
module CapDigraph0
|
4938
|
+
def value
|
4939
|
+
'Ae'
|
4940
|
+
end
|
4941
|
+
end
|
4942
|
+
|
4943
|
+
module CapDigraph1
|
4944
|
+
def value
|
4945
|
+
'Oe'
|
4946
|
+
end
|
4947
|
+
end
|
4948
|
+
|
4949
|
+
def _nt_cap_digraph
|
4950
|
+
start_index = index
|
4951
|
+
if node_cache[:cap_digraph].has_key?(index)
|
4952
|
+
cached = node_cache[:cap_digraph][index]
|
4953
|
+
@index = cached.interval.end if cached
|
4954
|
+
return cached
|
4955
|
+
end
|
4956
|
+
|
4957
|
+
i0 = index
|
4958
|
+
if input.index("Æ", index) == index
|
4959
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4960
|
+
r1.extend(CapDigraph0)
|
4961
|
+
@index += 1
|
4962
|
+
else
|
4963
|
+
terminal_parse_failure("Æ")
|
4964
|
+
r1 = nil
|
4965
|
+
end
|
4966
|
+
if r1
|
4967
|
+
r0 = r1
|
4968
|
+
else
|
4969
|
+
if input.index("Œ", index) == index
|
4970
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4971
|
+
r2.extend(CapDigraph1)
|
4972
|
+
@index += 1
|
4973
|
+
else
|
4974
|
+
terminal_parse_failure("Œ")
|
4975
|
+
r2 = nil
|
4976
|
+
end
|
4977
|
+
if r2
|
4978
|
+
r0 = r2
|
4979
|
+
else
|
4980
|
+
self.index = i0
|
4981
|
+
r0 = nil
|
4982
|
+
end
|
4983
|
+
end
|
4984
|
+
|
4985
|
+
node_cache[:cap_digraph][start_index] = r0
|
4986
|
+
|
4987
|
+
return r0
|
4988
|
+
end
|
4989
|
+
|
4730
4990
|
module Digraph0
|
4731
4991
|
def value
|
4732
4992
|
'ae'
|
@@ -4748,21 +5008,23 @@ module ScientificNameClean
|
|
4748
5008
|
end
|
4749
5009
|
|
4750
5010
|
i0 = index
|
4751
|
-
if input.index(
|
5011
|
+
if input.index("æ", index) == index
|
4752
5012
|
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4753
5013
|
r1.extend(Digraph0)
|
4754
5014
|
@index += 1
|
4755
5015
|
else
|
5016
|
+
terminal_parse_failure("æ")
|
4756
5017
|
r1 = nil
|
4757
5018
|
end
|
4758
5019
|
if r1
|
4759
5020
|
r0 = r1
|
4760
5021
|
else
|
4761
|
-
if input.index(
|
5022
|
+
if input.index("œ", index) == index
|
4762
5023
|
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4763
5024
|
r2.extend(Digraph1)
|
4764
5025
|
@index += 1
|
4765
5026
|
else
|
5027
|
+
terminal_parse_failure("œ")
|
4766
5028
|
r2 = nil
|
4767
5029
|
end
|
4768
5030
|
if r2
|
@@ -4844,6 +5106,11 @@ module ScientificNameClean
|
|
4844
5106
|
def value
|
4845
5107
|
text_value.strip
|
4846
5108
|
end
|
5109
|
+
|
5110
|
+
def pos
|
5111
|
+
{interval.begin => ['year', interval.end]}
|
5112
|
+
end
|
5113
|
+
|
4847
5114
|
def details
|
4848
5115
|
{:year => value}
|
4849
5116
|
end
|
@@ -4907,6 +5174,11 @@ module ScientificNameClean
|
|
4907
5174
|
def value
|
4908
5175
|
a.text_value
|
4909
5176
|
end
|
5177
|
+
|
5178
|
+
def pos
|
5179
|
+
{interval.begin => ['year', interval.end]}
|
5180
|
+
end
|
5181
|
+
|
4910
5182
|
def details
|
4911
5183
|
{:year => value}
|
4912
5184
|
end
|