nexus_parser 1.2.0 → 1.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/lib/nexus_parser/lexer.rb +0 -10
- data/lib/nexus_parser/parser.rb +146 -77
- data/lib/nexus_parser/tokens.rb +87 -84
- data/lib/nexus_parser/version.rb +1 -1
- data/lib/nexus_parser.rb +41 -14
- data/test/test_nexus_parser.rb +371 -26
- metadata +3 -3
data/test/test_nexus_parser.rb
CHANGED
@@ -35,18 +35,18 @@ class Test_Lexer < Test::Unit::TestCase
|
|
35
35
|
def test_lexer
|
36
36
|
lexer = NexusParser::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
|
37
37
|
assert lexer.pop(NexusParser::Tokens::LBracket)
|
38
|
-
assert id = lexer.pop(NexusParser::Tokens::
|
38
|
+
assert id = lexer.pop(NexusParser::Tokens::Label)
|
39
39
|
assert_equal(id.value, "foo")
|
40
40
|
assert lexer.pop(NexusParser::Tokens::RBracket)
|
41
41
|
assert lexer.pop(NexusParser::Tokens::BeginBlk)
|
42
42
|
assert lexer.pop(NexusParser::Tokens::TaxaBlk)
|
43
|
-
assert foo = lexer.pop(NexusParser::Tokens::
|
43
|
+
assert foo = lexer.pop(NexusParser::Tokens::Label)
|
44
44
|
assert_equal("BLORF", foo.value) # truncating whitespace
|
45
45
|
assert lexer.pop(NexusParser::Tokens::BlkEnd)
|
46
46
|
|
47
47
|
lexer2 = NexusParser::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
|
48
48
|
assert lexer2.pop(NexusParser::Tokens::LBracket)
|
49
|
-
assert id = lexer2.pop(NexusParser::Tokens::
|
49
|
+
assert id = lexer2.pop(NexusParser::Tokens::Label)
|
50
50
|
assert_equal(id.value, "foo")
|
51
51
|
assert lexer2.pop(NexusParser::Tokens::RBracket)
|
52
52
|
assert lexer2.pop(NexusParser::Tokens::BeginBlk)
|
@@ -56,46 +56,52 @@ class Test_Lexer < Test::Unit::TestCase
|
|
56
56
|
assert lexer2.pop(NexusParser::Tokens::LParen)
|
57
57
|
assert lexer2.pop(NexusParser::Tokens::RParen)
|
58
58
|
|
59
|
+
lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
|
60
|
+
assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
|
61
|
+
assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
|
62
|
+
assert lexer2a.pop(NexusParser::Tokens::LBracket)
|
63
|
+
assert lexer2a.pop(NexusParser::Tokens::RBracket)
|
64
|
+
|
59
65
|
lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
|
60
66
|
assert lexer3.pop(NexusParser::Tokens::LBracket)
|
61
|
-
assert id = lexer3.pop(NexusParser::Tokens::
|
67
|
+
assert id = lexer3.pop(NexusParser::Tokens::Label)
|
62
68
|
assert_equal(id.value, "foo")
|
63
69
|
assert lexer3.pop(NexusParser::Tokens::RBracket)
|
64
70
|
assert lexer3.pop(NexusParser::Tokens::BeginBlk)
|
65
71
|
assert lexer3.pop(NexusParser::Tokens::ChrsBlk)
|
66
|
-
assert foo = lexer3.pop(NexusParser::Tokens::
|
72
|
+
assert foo = lexer3.pop(NexusParser::Tokens::Label)
|
67
73
|
assert_equal("BLORF", foo.value)
|
68
74
|
assert lexer3.pop(NexusParser::Tokens::BlkEnd)
|
69
75
|
|
70
76
|
lexer4 = NexusParser::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
|
71
77
|
assert lexer4.pop(NexusParser::Tokens::BeginBlk)
|
72
78
|
assert lexer4.pop(NexusParser::Tokens::ChrsBlk)
|
73
|
-
assert foo = lexer4.pop(NexusParser::Tokens::
|
74
|
-
assert_equal(123123123, foo.value)
|
79
|
+
assert foo = lexer4.pop(NexusParser::Tokens::PositiveInteger)
|
80
|
+
assert_equal('123123123', foo.value)
|
75
81
|
assert lexer4.pop(NexusParser::Tokens::BlkEnd)
|
76
82
|
|
77
83
|
lexer5 = NexusParser::Lexer.new("(0,1)")
|
78
84
|
assert lexer5.pop(NexusParser::Tokens::LParen)
|
79
|
-
assert foo = lexer5.pop(NexusParser::Tokens::
|
80
|
-
assert_equal(0, foo.value)
|
85
|
+
assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
|
86
|
+
assert_equal('0', foo.value)
|
81
87
|
assert lexer5.pop(NexusParser::Tokens::Comma)
|
82
|
-
assert foo = lexer5.pop(NexusParser::Tokens::
|
83
|
-
assert_equal(1, foo.value)
|
88
|
+
assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
|
89
|
+
assert_equal('1', foo.value)
|
84
90
|
assert lexer5.pop(NexusParser::Tokens::RParen)
|
85
91
|
|
86
92
|
lexer6 = NexusParser::Lexer.new(" 210(0,1)10A1\n")
|
87
93
|
assert foo = lexer6.pop(NexusParser::Tokens::RowVec)
|
88
94
|
assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
|
89
95
|
|
90
|
-
lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{
|
96
|
+
lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{345}(0)(1 a)\n")
|
91
97
|
assert foo = lexer6a.pop(NexusParser::Tokens::RowVec)
|
92
98
|
assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
|
93
99
|
|
94
|
-
lexer6b = NexusParser::Lexer.new(" 201{0 1}
|
100
|
+
lexer6b = NexusParser::Lexer.new(" 201(01){0 1}0100\x0A") # *nix line ending
|
95
101
|
assert foo = lexer6b.pop(NexusParser::Tokens::RowVec)
|
96
102
|
assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
|
97
103
|
|
98
|
-
lexer6c = NexusParser::Lexer.new(" 201{0 1}{
|
104
|
+
lexer6c = NexusParser::Lexer.new(" 201{0 1}{01}0100\x0D\x0A") # * dos line ending
|
99
105
|
assert foo = lexer6c.pop(NexusParser::Tokens::RowVec)
|
100
106
|
assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
|
101
107
|
|
@@ -120,7 +126,41 @@ class Test_Lexer < Test::Unit::TestCase
|
|
120
126
|
def test_row_vec
|
121
127
|
lexer = NexusParser::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
|
122
128
|
assert foo = lexer.pop(NexusParser::Tokens::RowVec)
|
123
|
-
assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "
|
129
|
+
assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "B", "D", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_ungrouped_spaces_in_row_vec
|
133
|
+
lexer = NexusParser::Lexer.new("- A 12(BC) ? \n")
|
134
|
+
assert foo = lexer.pop(NexusParser::Tokens::RowVec)
|
135
|
+
assert_equal(['-', 'A', '1', '2', ['B', 'C'], '?'], foo.value)
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_mismatched_parens_row_vec
|
139
|
+
lexer = NexusParser::Lexer.new("01(12(13\n")
|
140
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
141
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
142
|
+
}
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_mismatched_groupers_row_vec
|
146
|
+
lexer = NexusParser::Lexer.new("01(12}13\n")
|
147
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
148
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
149
|
+
}
|
150
|
+
end
|
151
|
+
|
152
|
+
def test_nested_parens_row_vec
|
153
|
+
lexer = NexusParser::Lexer.new("01(12(34))13\n")
|
154
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
155
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_unclosed_parens_row_vec
|
160
|
+
lexer = NexusParser::Lexer.new("01(123413\n")
|
161
|
+
assert_raise_with_message(NexusParser::ParseError, /Unclosed/) {
|
162
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
163
|
+
}
|
124
164
|
end
|
125
165
|
|
126
166
|
def test_punctuation
|
@@ -149,7 +189,7 @@ class Test_Lexer < Test::Unit::TestCase
|
|
149
189
|
def test_EndBlk
|
150
190
|
lexer = NexusParser::Lexer.new(" \n\n End ;")
|
151
191
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
152
|
-
lexer = NexusParser::Lexer.new("\n\
|
192
|
+
lexer = NexusParser::Lexer.new("\n\nEndblock;")
|
153
193
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
154
194
|
|
155
195
|
lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
|
@@ -401,13 +441,13 @@ class Test_Lexer < Test::Unit::TestCase
|
|
401
441
|
CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
|
402
442
|
|
403
443
|
|
404
|
-
|
444
|
+
ENDBLOCK;
|
405
445
|
|
406
446
|
BEGIN some other block;")
|
407
447
|
|
408
448
|
assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
|
409
449
|
assert_equal 'LABELS', foo.value.slice(0,6)
|
410
|
-
assert_equal '
|
450
|
+
assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
|
411
451
|
end
|
412
452
|
|
413
453
|
def test_SetsBlk
|
@@ -422,11 +462,6 @@ class Test_Lexer < Test::Unit::TestCase
|
|
422
462
|
assert_equal 'SETS', foo.value.slice(0,4)
|
423
463
|
assert_equal 'END;', foo.value.slice(-4,4)
|
424
464
|
end
|
425
|
-
|
426
|
-
def test_lexer_errors
|
427
|
-
lexer = NexusParser::Lexer.new("*&")
|
428
|
-
assert_raise(NexusParser::ParseError) {lexer.peek(NexusParser::Tokens::ID)}
|
429
|
-
end
|
430
465
|
end
|
431
466
|
|
432
467
|
|
@@ -513,8 +548,6 @@ class Test_Parser < Test::Unit::TestCase
|
|
513
548
|
assert_equal "Tetragnatha", foo.taxa[9].name
|
514
549
|
end
|
515
550
|
|
516
|
-
|
517
|
-
|
518
551
|
def test_parse_characters_blk
|
519
552
|
input= "
|
520
553
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
@@ -570,6 +603,30 @@ class Test_Parser < Test::Unit::TestCase
|
|
570
603
|
assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
|
571
604
|
end
|
572
605
|
|
606
|
+
def test_matrix_with_short_row
|
607
|
+
input= "
|
608
|
+
DIMENSIONS NCHAR=2;
|
609
|
+
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
|
610
|
+
CHARSTATELABELS
|
611
|
+
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger;
|
612
|
+
MATRIX
|
613
|
+
Dictyna 0?
|
614
|
+
Uloborus ??
|
615
|
+
Deinopis 0
|
616
|
+
;
|
617
|
+
END;"
|
618
|
+
|
619
|
+
builder = NexusParser::Builder.new
|
620
|
+
@lexer = NexusParser::Lexer.new(input)
|
621
|
+
|
622
|
+
# stub the taxa, they would otherwise get added in dimensions or taxa block
|
623
|
+
(0..2).each{|i| builder.stub_taxon}
|
624
|
+
|
625
|
+
assert_raise_with_message(NexusParser::ParseError, /too short/) {
|
626
|
+
NexusParser::Parser.new(@lexer, builder).parse_characters_blk
|
627
|
+
}
|
628
|
+
end
|
629
|
+
|
573
630
|
def test_characters_block_without_IDs_or_title
|
574
631
|
input= "
|
575
632
|
DIMENSIONS NCHAR=10;
|
@@ -589,7 +646,7 @@ class Test_Parser < Test::Unit::TestCase
|
|
589
646
|
Tetragnatha 0?01011011
|
590
647
|
|
591
648
|
;
|
592
|
-
|
649
|
+
ENDBLOCK;"
|
593
650
|
|
594
651
|
builder = NexusParser::Builder.new
|
595
652
|
@lexer = NexusParser::Lexer.new(input)
|
@@ -619,6 +676,55 @@ class Test_Parser < Test::Unit::TestCase
|
|
619
676
|
assert_equal 10, foo.characters.size
|
620
677
|
end
|
621
678
|
|
679
|
+
def test_characters_charlabels_statelabels_block
|
680
|
+
input= "
|
681
|
+
DIMENSIONS NCHAR=4;
|
682
|
+
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
|
683
|
+
CHARLABELS
|
684
|
+
Tibia_II
|
685
|
+
TII_macrosetae
|
686
|
+
'Femoral tuber'
|
687
|
+
_
|
688
|
+
;
|
689
|
+
STATELABELS
|
690
|
+
1 norm modified,
|
691
|
+
3 3 3.5 4,
|
692
|
+
4 pres
|
693
|
+
;
|
694
|
+
MATRIX
|
695
|
+
Dictyna -?1(01)
|
696
|
+
Uloborus 0321
|
697
|
+
;
|
698
|
+
ENDBLOCK;"
|
699
|
+
|
700
|
+
builder = NexusParser::Builder.new
|
701
|
+
lexer = NexusParser::Lexer.new(input)
|
702
|
+
|
703
|
+
(0..3).each{|i| builder.stub_taxon}
|
704
|
+
|
705
|
+
NexusParser::Parser.new(lexer,builder).parse_characters_blk
|
706
|
+
foo = builder.nexus_file
|
707
|
+
|
708
|
+
assert_equal 4, foo.characters.size
|
709
|
+
assert_equal "Femoral tuber", foo.characters[2].name
|
710
|
+
assert_equal "Undefined", foo.characters[3].name
|
711
|
+
|
712
|
+
assert_equal "norm", foo.characters[0].states["0"].name
|
713
|
+
assert_equal "modified", foo.characters[0].states["1"].name
|
714
|
+
|
715
|
+
assert_equal "", foo.characters[1].states["3"].name
|
716
|
+
|
717
|
+
assert_equal ["3", "3.5", "4"], foo.characters[2].states.keys.collect{|s| foo.characters[2].states[s].name}.sort
|
718
|
+
|
719
|
+
assert_equal "", foo.characters[1].states["3"].name
|
720
|
+
|
721
|
+
assert_equal ["-"], foo.codings[0][0].states
|
722
|
+
assert_equal ["?"], foo.codings[0][1].states
|
723
|
+
assert_equal ["0", "1"], foo.codings[0][3].states
|
724
|
+
|
725
|
+
assert_equal ["3"], foo.codings[1][1].states
|
726
|
+
end
|
727
|
+
|
622
728
|
def test_codings
|
623
729
|
foo = parse_nexus_file(@nf)
|
624
730
|
assert_equal 100, foo.codings.flatten.size # two multistates count in single cells
|
@@ -655,6 +761,82 @@ class Test_Parser < Test::Unit::TestCase
|
|
655
761
|
# add test that nothing is left in lexer
|
656
762
|
end
|
657
763
|
|
764
|
+
def test_parse_format_respect_case
|
765
|
+
input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
|
766
|
+
builder = NexusParser::Builder.new
|
767
|
+
lexer = NexusParser::Lexer.new(input)
|
768
|
+
|
769
|
+
NexusParser::Parser.new(lexer,builder).parse_format
|
770
|
+
foo = builder.nexus_file
|
771
|
+
|
772
|
+
assert_equal "STANDARD", foo.vars[:datatype]
|
773
|
+
assert_equal "-", foo.vars[:gap]
|
774
|
+
assert_equal "?", foo.vars[:missing]
|
775
|
+
assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
|
776
|
+
end
|
777
|
+
|
778
|
+
# https://github.com/mjy/nexus_parser/issues/9
|
779
|
+
def test_three_both_numeric_and_label_state_names_in_a_row
|
780
|
+
input =" CHARSTATELABELS
|
781
|
+
1 'Metatarsal trichobothria (CodAra.29)' / 3 9 27 asdf;
|
782
|
+
Matrix
|
783
|
+
fooo 01 more stuff here that should not be hit"
|
784
|
+
|
785
|
+
builder = NexusParser::Builder.new
|
786
|
+
lexer = NexusParser::Lexer.new(input)
|
787
|
+
|
788
|
+
builder.stub_chr()
|
789
|
+
|
790
|
+
NexusParser::Parser.new(lexer, builder).parse_chr_state_labels
|
791
|
+
|
792
|
+
foo = builder.nexus_file
|
793
|
+
|
794
|
+
assert_equal "3", foo.characters[0].states['0'].name
|
795
|
+
assert_equal "9", foo.characters[0].states['1'].name
|
796
|
+
assert_equal "27", foo.characters[0].states['2'].name
|
797
|
+
assert_equal "asdf", foo.characters[0].states['3'].name
|
798
|
+
end
|
799
|
+
|
800
|
+
def test_non_label_character_name_character_labels
|
801
|
+
input = 'CHARSTATELABELS
|
802
|
+
1 (intentionally_blank) /,
|
803
|
+
2 /,
|
804
|
+
3 %_coverage /,
|
805
|
+
4 #_of_widgets /,
|
806
|
+
5 !endangered! /,
|
807
|
+
6 @the_front /,
|
808
|
+
7 =antennae,
|
809
|
+
8 `a_=_2` /,
|
810
|
+
9 -35_or-36 ,
|
811
|
+
10 27_or_less /,
|
812
|
+
11 fine_not_fine /,
|
813
|
+
12 3,
|
814
|
+
;'
|
815
|
+
|
816
|
+
builder = NexusParser::Builder.new
|
817
|
+
lexer = NexusParser::Lexer.new(input)
|
818
|
+
|
819
|
+
(0..11).each{builder.stub_chr()}
|
820
|
+
|
821
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
822
|
+
|
823
|
+
foo = builder.nexus_file
|
824
|
+
|
825
|
+
assert_equal 12, foo.characters.size
|
826
|
+
assert_equal "(intentionally_blank)", foo.characters[0].name
|
827
|
+
assert_equal "Undefined", foo.characters[1].name
|
828
|
+
assert_equal "%_coverage", foo.characters[2].name
|
829
|
+
assert_equal "#_of_widgets", foo.characters[3].name
|
830
|
+
assert_equal "!endangered!", foo.characters[4].name
|
831
|
+
assert_equal "@the_front", foo.characters[5].name
|
832
|
+
assert_equal "=antennae", foo.characters[6].name # =3
|
833
|
+
assert_equal "`a_=_2`", foo.characters[7].name
|
834
|
+
assert_equal "-35_or-36", foo.characters[8].name
|
835
|
+
assert_equal "27_or_less", foo.characters[9].name
|
836
|
+
assert_equal "fine_not_fine", foo.characters[10].name
|
837
|
+
assert_equal "3", foo.characters[11].name
|
838
|
+
end
|
839
|
+
|
658
840
|
def test_parse_chr_state_labels
|
659
841
|
input =" CHARSTATELABELS
|
660
842
|
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
@@ -736,6 +918,169 @@ class Test_Parser < Test::Unit::TestCase
|
|
736
918
|
|
737
919
|
end
|
738
920
|
|
921
|
+
def test_parse_chr_labels
|
922
|
+
input =" CHARLABELS
|
923
|
+
_
|
924
|
+
'Maxillary teeth'
|
925
|
+
as_df
|
926
|
+
'Highest number of maxillary teeth (or alveoli):';
|
927
|
+
STATELABELS
|
928
|
+
1 more more more,"
|
929
|
+
|
930
|
+
builder = NexusParser::Builder.new
|
931
|
+
lexer = NexusParser::Lexer.new(input)
|
932
|
+
|
933
|
+
(0..3).each{builder.stub_chr()}
|
934
|
+
|
935
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_labels
|
936
|
+
|
937
|
+
foo = builder.nexus_file
|
938
|
+
assert_equal 4, foo.characters.size
|
939
|
+
assert_equal 'Undefined', foo.characters[0].name
|
940
|
+
assert_equal 'Maxillary teeth', foo.characters[1].name
|
941
|
+
assert_equal 'as_df', foo.characters[2].name
|
942
|
+
assert_equal 'Highest number of maxillary teeth (or alveoli):', foo.characters[3].name
|
943
|
+
end
|
944
|
+
|
945
|
+
def test_parse_state_labels
|
946
|
+
input =" STATELABELS
|
947
|
+
1 norm modified,
|
948
|
+
3,
|
949
|
+
4 pres
|
950
|
+
;
|
951
|
+
CHARLABELS;
|
952
|
+
"
|
953
|
+
|
954
|
+
builder = NexusParser::Builder.new
|
955
|
+
lexer = NexusParser::Lexer.new(input)
|
956
|
+
|
957
|
+
(0..3).each{builder.stub_chr()}
|
958
|
+
|
959
|
+
NexusParser::Parser.new(lexer,builder).parse_state_labels
|
960
|
+
|
961
|
+
foo = builder.nexus_file
|
962
|
+
assert_equal 4, foo.characters.size
|
963
|
+
|
964
|
+
assert_equal "norm", foo.characters[0].states["0"].name
|
965
|
+
assert_equal "modified", foo.characters[0].states["1"].name
|
966
|
+
|
967
|
+
assert_empty foo.characters[1].states
|
968
|
+
|
969
|
+
assert_empty foo.characters[2].states
|
970
|
+
|
971
|
+
assert_equal "pres", foo.characters[3].states["0"].name
|
972
|
+
end
|
973
|
+
|
974
|
+
def test_non_label_character_state_character_labels
|
975
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
976
|
+
.5
|
977
|
+
.1.2_form
|
978
|
+
idsimple
|
979
|
+
%_of_length_less_than_10
|
980
|
+
!poisonous!
|
981
|
+
#_is_3_or_4
|
982
|
+
(leave_as_is)
|
983
|
+
@12_o_clock
|
984
|
+
>2
|
985
|
+
~equal
|
986
|
+
=9
|
987
|
+
;'
|
988
|
+
|
989
|
+
builder = NexusParser::Builder.new
|
990
|
+
lexer = NexusParser::Lexer.new(input)
|
991
|
+
|
992
|
+
builder.stub_chr()
|
993
|
+
|
994
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
995
|
+
|
996
|
+
foo = builder.nexus_file
|
997
|
+
|
998
|
+
assert_equal ".5", foo.characters[0].states["0"].name
|
999
|
+
assert_equal ".1.2_form", foo.characters[0].states["1"].name
|
1000
|
+
assert_equal "idsimple", foo.characters[0].states["2"].name
|
1001
|
+
assert_equal "%_of_length_less_than_10", foo.characters[0].states["3"].name
|
1002
|
+
assert_equal "!poisonous!", foo.characters[0].states["4"].name
|
1003
|
+
assert_equal "#_is_3_or_4", foo.characters[0].states["5"].name
|
1004
|
+
assert_equal "(leave_as_is)", foo.characters[0].states["6"].name
|
1005
|
+
assert_equal "@12_o_clock", foo.characters[0].states["7"].name
|
1006
|
+
assert_equal ">2", foo.characters[0].states["8"].name
|
1007
|
+
assert_equal "~equal", foo.characters[0].states["9"].name
|
1008
|
+
assert_equal "=9", foo.characters[0].states["10"].name
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
def test_arbitrary_quote_and_quotelike_character_state_labels
|
1012
|
+
# We could tighten up our handling of accidentally unclosed quotes, but
|
1013
|
+
# there's pretty much no way to recover in general, so we're not testing
|
1014
|
+
# them here.
|
1015
|
+
# Things like ""asdf" " failing is a known issue (maybe not solvable with
|
1016
|
+
# regular expressions?).
|
1017
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1018
|
+
"asd, \'f\'"
|
1019
|
+
""a\'sdf "
|
1020
|
+
\' /as"df/\'
|
1021
|
+
\'asdf;\'
|
1022
|
+
""as, df""
|
1023
|
+
;'
|
1024
|
+
|
1025
|
+
builder = NexusParser::Builder.new
|
1026
|
+
lexer = NexusParser::Lexer.new(input)
|
1027
|
+
|
1028
|
+
builder.stub_chr()
|
1029
|
+
|
1030
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1031
|
+
|
1032
|
+
foo = builder.nexus_file
|
1033
|
+
|
1034
|
+
assert_equal 'asd, \'f\'', foo.characters[0].states["0"].name
|
1035
|
+
assert_equal '"a\'sdf', foo.characters[0].states["1"].name
|
1036
|
+
assert_equal '/as"df/', foo.characters[0].states["2"].name
|
1037
|
+
assert_equal 'asdf;', foo.characters[0].states["3"].name
|
1038
|
+
assert_equal '"as, df"', foo.characters[0].states["4"].name
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
|
1042
|
+
def test_number_label_chr_state_labels
|
1043
|
+
# Character state names that start with numbers
|
1044
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1045
|
+
123abc
|
1046
|
+
-1.23abc
|
1047
|
+
-3e-3abc
|
1048
|
+
25%_or_less_than
|
1049
|
+
;'
|
1050
|
+
|
1051
|
+
builder = NexusParser::Builder.new
|
1052
|
+
lexer = NexusParser::Lexer.new(input)
|
1053
|
+
|
1054
|
+
(0..3).each{builder.stub_chr()}
|
1055
|
+
|
1056
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1057
|
+
|
1058
|
+
foo = builder.nexus_file
|
1059
|
+
|
1060
|
+
assert_equal "123abc", foo.characters[0].states["0"].name
|
1061
|
+
assert_equal "-1.23abc", foo.characters[0].states["1"].name
|
1062
|
+
assert_equal "-3e-3abc", foo.characters[0].states["2"].name
|
1063
|
+
assert_equal "25%_or_less_than", foo.characters[0].states["3"].name
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
def test_value_pair_label_chr_state_labels
|
1067
|
+
# Character state names that are ValuePairs
|
1068
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1069
|
+
234=(a_b_c)
|
1070
|
+
;'
|
1071
|
+
|
1072
|
+
builder = NexusParser::Builder.new
|
1073
|
+
lexer = NexusParser::Lexer.new(input)
|
1074
|
+
|
1075
|
+
builder.stub_chr()
|
1076
|
+
|
1077
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1078
|
+
|
1079
|
+
foo = builder.nexus_file
|
1080
|
+
|
1081
|
+
assert_equal '234=(a_b_c)', foo.characters[0].states["0"].name
|
1082
|
+
end
|
1083
|
+
|
739
1084
|
def DONT_test_parse_really_long_string_of_chr_state_labels
|
740
1085
|
input =" CHARSTATELABELS
|
741
1086
|
1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nexus_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mjy
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-
|
12
|
+
date: 2024-05-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
128
|
- !ruby/object:Gem::Version
|
129
129
|
version: '0'
|
130
130
|
requirements: []
|
131
|
-
rubygems_version: 3.5.
|
131
|
+
rubygems_version: 3.5.9
|
132
132
|
signing_key:
|
133
133
|
specification_version: 4
|
134
134
|
summary: A Nexus file format (phylogenetic inference) parser in Ruby.
|