nexus_parser 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/lib/nexus_parser/lexer.rb +0 -10
- data/lib/nexus_parser/parser.rb +146 -77
- data/lib/nexus_parser/tokens.rb +87 -84
- data/lib/nexus_parser/version.rb +1 -1
- data/lib/nexus_parser.rb +41 -14
- data/test/test_nexus_parser.rb +371 -26
- metadata +3 -3
data/test/test_nexus_parser.rb
CHANGED
@@ -35,18 +35,18 @@ class Test_Lexer < Test::Unit::TestCase
|
|
35
35
|
def test_lexer
|
36
36
|
lexer = NexusParser::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
|
37
37
|
assert lexer.pop(NexusParser::Tokens::LBracket)
|
38
|
-
assert id = lexer.pop(NexusParser::Tokens::
|
38
|
+
assert id = lexer.pop(NexusParser::Tokens::Label)
|
39
39
|
assert_equal(id.value, "foo")
|
40
40
|
assert lexer.pop(NexusParser::Tokens::RBracket)
|
41
41
|
assert lexer.pop(NexusParser::Tokens::BeginBlk)
|
42
42
|
assert lexer.pop(NexusParser::Tokens::TaxaBlk)
|
43
|
-
assert foo = lexer.pop(NexusParser::Tokens::
|
43
|
+
assert foo = lexer.pop(NexusParser::Tokens::Label)
|
44
44
|
assert_equal("BLORF", foo.value) # truncating whitespace
|
45
45
|
assert lexer.pop(NexusParser::Tokens::BlkEnd)
|
46
46
|
|
47
47
|
lexer2 = NexusParser::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
|
48
48
|
assert lexer2.pop(NexusParser::Tokens::LBracket)
|
49
|
-
assert id = lexer2.pop(NexusParser::Tokens::
|
49
|
+
assert id = lexer2.pop(NexusParser::Tokens::Label)
|
50
50
|
assert_equal(id.value, "foo")
|
51
51
|
assert lexer2.pop(NexusParser::Tokens::RBracket)
|
52
52
|
assert lexer2.pop(NexusParser::Tokens::BeginBlk)
|
@@ -56,46 +56,52 @@ class Test_Lexer < Test::Unit::TestCase
|
|
56
56
|
assert lexer2.pop(NexusParser::Tokens::LParen)
|
57
57
|
assert lexer2.pop(NexusParser::Tokens::RParen)
|
58
58
|
|
59
|
+
lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
|
60
|
+
assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
|
61
|
+
assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
|
62
|
+
assert lexer2a.pop(NexusParser::Tokens::LBracket)
|
63
|
+
assert lexer2a.pop(NexusParser::Tokens::RBracket)
|
64
|
+
|
59
65
|
lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
|
60
66
|
assert lexer3.pop(NexusParser::Tokens::LBracket)
|
61
|
-
assert id = lexer3.pop(NexusParser::Tokens::
|
67
|
+
assert id = lexer3.pop(NexusParser::Tokens::Label)
|
62
68
|
assert_equal(id.value, "foo")
|
63
69
|
assert lexer3.pop(NexusParser::Tokens::RBracket)
|
64
70
|
assert lexer3.pop(NexusParser::Tokens::BeginBlk)
|
65
71
|
assert lexer3.pop(NexusParser::Tokens::ChrsBlk)
|
66
|
-
assert foo = lexer3.pop(NexusParser::Tokens::
|
72
|
+
assert foo = lexer3.pop(NexusParser::Tokens::Label)
|
67
73
|
assert_equal("BLORF", foo.value)
|
68
74
|
assert lexer3.pop(NexusParser::Tokens::BlkEnd)
|
69
75
|
|
70
76
|
lexer4 = NexusParser::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
|
71
77
|
assert lexer4.pop(NexusParser::Tokens::BeginBlk)
|
72
78
|
assert lexer4.pop(NexusParser::Tokens::ChrsBlk)
|
73
|
-
assert foo = lexer4.pop(NexusParser::Tokens::
|
74
|
-
assert_equal(123123123, foo.value)
|
79
|
+
assert foo = lexer4.pop(NexusParser::Tokens::PositiveInteger)
|
80
|
+
assert_equal('123123123', foo.value)
|
75
81
|
assert lexer4.pop(NexusParser::Tokens::BlkEnd)
|
76
82
|
|
77
83
|
lexer5 = NexusParser::Lexer.new("(0,1)")
|
78
84
|
assert lexer5.pop(NexusParser::Tokens::LParen)
|
79
|
-
assert foo = lexer5.pop(NexusParser::Tokens::
|
80
|
-
assert_equal(0, foo.value)
|
85
|
+
assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
|
86
|
+
assert_equal('0', foo.value)
|
81
87
|
assert lexer5.pop(NexusParser::Tokens::Comma)
|
82
|
-
assert foo = lexer5.pop(NexusParser::Tokens::
|
83
|
-
assert_equal(1, foo.value)
|
88
|
+
assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
|
89
|
+
assert_equal('1', foo.value)
|
84
90
|
assert lexer5.pop(NexusParser::Tokens::RParen)
|
85
91
|
|
86
92
|
lexer6 = NexusParser::Lexer.new(" 210(0,1)10A1\n")
|
87
93
|
assert foo = lexer6.pop(NexusParser::Tokens::RowVec)
|
88
94
|
assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
|
89
95
|
|
90
|
-
lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{
|
96
|
+
lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{345}(0)(1 a)\n")
|
91
97
|
assert foo = lexer6a.pop(NexusParser::Tokens::RowVec)
|
92
98
|
assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
|
93
99
|
|
94
|
-
lexer6b = NexusParser::Lexer.new(" 201{0 1}
|
100
|
+
lexer6b = NexusParser::Lexer.new(" 201(01){0 1}0100\x0A") # *nix line ending
|
95
101
|
assert foo = lexer6b.pop(NexusParser::Tokens::RowVec)
|
96
102
|
assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
|
97
103
|
|
98
|
-
lexer6c = NexusParser::Lexer.new(" 201{0 1}{
|
104
|
+
lexer6c = NexusParser::Lexer.new(" 201{0 1}{01}0100\x0D\x0A") # * dos line ending
|
99
105
|
assert foo = lexer6c.pop(NexusParser::Tokens::RowVec)
|
100
106
|
assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
|
101
107
|
|
@@ -120,7 +126,41 @@ class Test_Lexer < Test::Unit::TestCase
|
|
120
126
|
def test_row_vec
|
121
127
|
lexer = NexusParser::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
|
122
128
|
assert foo = lexer.pop(NexusParser::Tokens::RowVec)
|
123
|
-
assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "
|
129
|
+
assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "B", "D", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_ungrouped_spaces_in_row_vec
|
133
|
+
lexer = NexusParser::Lexer.new("- A 12(BC) ? \n")
|
134
|
+
assert foo = lexer.pop(NexusParser::Tokens::RowVec)
|
135
|
+
assert_equal(['-', 'A', '1', '2', ['B', 'C'], '?'], foo.value)
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_mismatched_parens_row_vec
|
139
|
+
lexer = NexusParser::Lexer.new("01(12(13\n")
|
140
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
141
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
142
|
+
}
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_mismatched_groupers_row_vec
|
146
|
+
lexer = NexusParser::Lexer.new("01(12}13\n")
|
147
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
148
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
149
|
+
}
|
150
|
+
end
|
151
|
+
|
152
|
+
def test_nested_parens_row_vec
|
153
|
+
lexer = NexusParser::Lexer.new("01(12(34))13\n")
|
154
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
155
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_unclosed_parens_row_vec
|
160
|
+
lexer = NexusParser::Lexer.new("01(123413\n")
|
161
|
+
assert_raise_with_message(NexusParser::ParseError, /Unclosed/) {
|
162
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
163
|
+
}
|
124
164
|
end
|
125
165
|
|
126
166
|
def test_punctuation
|
@@ -149,7 +189,7 @@ class Test_Lexer < Test::Unit::TestCase
|
|
149
189
|
def test_EndBlk
|
150
190
|
lexer = NexusParser::Lexer.new(" \n\n End ;")
|
151
191
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
152
|
-
lexer = NexusParser::Lexer.new("\n\
|
192
|
+
lexer = NexusParser::Lexer.new("\n\nEndblock;")
|
153
193
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
154
194
|
|
155
195
|
lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
|
@@ -401,13 +441,13 @@ class Test_Lexer < Test::Unit::TestCase
|
|
401
441
|
CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
|
402
442
|
|
403
443
|
|
404
|
-
|
444
|
+
ENDBLOCK;
|
405
445
|
|
406
446
|
BEGIN some other block;")
|
407
447
|
|
408
448
|
assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
|
409
449
|
assert_equal 'LABELS', foo.value.slice(0,6)
|
410
|
-
assert_equal '
|
450
|
+
assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
|
411
451
|
end
|
412
452
|
|
413
453
|
def test_SetsBlk
|
@@ -422,11 +462,6 @@ class Test_Lexer < Test::Unit::TestCase
|
|
422
462
|
assert_equal 'SETS', foo.value.slice(0,4)
|
423
463
|
assert_equal 'END;', foo.value.slice(-4,4)
|
424
464
|
end
|
425
|
-
|
426
|
-
def test_lexer_errors
|
427
|
-
lexer = NexusParser::Lexer.new("*&")
|
428
|
-
assert_raise(NexusParser::ParseError) {lexer.peek(NexusParser::Tokens::ID)}
|
429
|
-
end
|
430
465
|
end
|
431
466
|
|
432
467
|
|
@@ -513,8 +548,6 @@ class Test_Parser < Test::Unit::TestCase
|
|
513
548
|
assert_equal "Tetragnatha", foo.taxa[9].name
|
514
549
|
end
|
515
550
|
|
516
|
-
|
517
|
-
|
518
551
|
def test_parse_characters_blk
|
519
552
|
input= "
|
520
553
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
@@ -570,6 +603,30 @@ class Test_Parser < Test::Unit::TestCase
|
|
570
603
|
assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
|
571
604
|
end
|
572
605
|
|
606
|
+
def test_matrix_with_short_row
|
607
|
+
input= "
|
608
|
+
DIMENSIONS NCHAR=2;
|
609
|
+
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
|
610
|
+
CHARSTATELABELS
|
611
|
+
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger;
|
612
|
+
MATRIX
|
613
|
+
Dictyna 0?
|
614
|
+
Uloborus ??
|
615
|
+
Deinopis 0
|
616
|
+
;
|
617
|
+
END;"
|
618
|
+
|
619
|
+
builder = NexusParser::Builder.new
|
620
|
+
@lexer = NexusParser::Lexer.new(input)
|
621
|
+
|
622
|
+
# stub the taxa, they would otherwise get added in dimensions or taxa block
|
623
|
+
(0..2).each{|i| builder.stub_taxon}
|
624
|
+
|
625
|
+
assert_raise_with_message(NexusParser::ParseError, /too short/) {
|
626
|
+
NexusParser::Parser.new(@lexer, builder).parse_characters_blk
|
627
|
+
}
|
628
|
+
end
|
629
|
+
|
573
630
|
def test_characters_block_without_IDs_or_title
|
574
631
|
input= "
|
575
632
|
DIMENSIONS NCHAR=10;
|
@@ -589,7 +646,7 @@ class Test_Parser < Test::Unit::TestCase
|
|
589
646
|
Tetragnatha 0?01011011
|
590
647
|
|
591
648
|
;
|
592
|
-
|
649
|
+
ENDBLOCK;"
|
593
650
|
|
594
651
|
builder = NexusParser::Builder.new
|
595
652
|
@lexer = NexusParser::Lexer.new(input)
|
@@ -619,6 +676,55 @@ class Test_Parser < Test::Unit::TestCase
|
|
619
676
|
assert_equal 10, foo.characters.size
|
620
677
|
end
|
621
678
|
|
679
|
+
def test_characters_charlabels_statelabels_block
|
680
|
+
input= "
|
681
|
+
DIMENSIONS NCHAR=4;
|
682
|
+
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
|
683
|
+
CHARLABELS
|
684
|
+
Tibia_II
|
685
|
+
TII_macrosetae
|
686
|
+
'Femoral tuber'
|
687
|
+
_
|
688
|
+
;
|
689
|
+
STATELABELS
|
690
|
+
1 norm modified,
|
691
|
+
3 3 3.5 4,
|
692
|
+
4 pres
|
693
|
+
;
|
694
|
+
MATRIX
|
695
|
+
Dictyna -?1(01)
|
696
|
+
Uloborus 0321
|
697
|
+
;
|
698
|
+
ENDBLOCK;"
|
699
|
+
|
700
|
+
builder = NexusParser::Builder.new
|
701
|
+
lexer = NexusParser::Lexer.new(input)
|
702
|
+
|
703
|
+
(0..3).each{|i| builder.stub_taxon}
|
704
|
+
|
705
|
+
NexusParser::Parser.new(lexer,builder).parse_characters_blk
|
706
|
+
foo = builder.nexus_file
|
707
|
+
|
708
|
+
assert_equal 4, foo.characters.size
|
709
|
+
assert_equal "Femoral tuber", foo.characters[2].name
|
710
|
+
assert_equal "Undefined", foo.characters[3].name
|
711
|
+
|
712
|
+
assert_equal "norm", foo.characters[0].states["0"].name
|
713
|
+
assert_equal "modified", foo.characters[0].states["1"].name
|
714
|
+
|
715
|
+
assert_equal "", foo.characters[1].states["3"].name
|
716
|
+
|
717
|
+
assert_equal ["3", "3.5", "4"], foo.characters[2].states.keys.collect{|s| foo.characters[2].states[s].name}.sort
|
718
|
+
|
719
|
+
assert_equal "", foo.characters[1].states["3"].name
|
720
|
+
|
721
|
+
assert_equal ["-"], foo.codings[0][0].states
|
722
|
+
assert_equal ["?"], foo.codings[0][1].states
|
723
|
+
assert_equal ["0", "1"], foo.codings[0][3].states
|
724
|
+
|
725
|
+
assert_equal ["3"], foo.codings[1][1].states
|
726
|
+
end
|
727
|
+
|
622
728
|
def test_codings
|
623
729
|
foo = parse_nexus_file(@nf)
|
624
730
|
assert_equal 100, foo.codings.flatten.size # two multistates count in single cells
|
@@ -655,6 +761,82 @@ class Test_Parser < Test::Unit::TestCase
|
|
655
761
|
# add test that nothing is left in lexer
|
656
762
|
end
|
657
763
|
|
764
|
+
def test_parse_format_respect_case
|
765
|
+
input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
|
766
|
+
builder = NexusParser::Builder.new
|
767
|
+
lexer = NexusParser::Lexer.new(input)
|
768
|
+
|
769
|
+
NexusParser::Parser.new(lexer,builder).parse_format
|
770
|
+
foo = builder.nexus_file
|
771
|
+
|
772
|
+
assert_equal "STANDARD", foo.vars[:datatype]
|
773
|
+
assert_equal "-", foo.vars[:gap]
|
774
|
+
assert_equal "?", foo.vars[:missing]
|
775
|
+
assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
|
776
|
+
end
|
777
|
+
|
778
|
+
# https://github.com/mjy/nexus_parser/issues/9
|
779
|
+
def test_three_both_numeric_and_label_state_names_in_a_row
|
780
|
+
input =" CHARSTATELABELS
|
781
|
+
1 'Metatarsal trichobothria (CodAra.29)' / 3 9 27 asdf;
|
782
|
+
Matrix
|
783
|
+
fooo 01 more stuff here that should not be hit"
|
784
|
+
|
785
|
+
builder = NexusParser::Builder.new
|
786
|
+
lexer = NexusParser::Lexer.new(input)
|
787
|
+
|
788
|
+
builder.stub_chr()
|
789
|
+
|
790
|
+
NexusParser::Parser.new(lexer, builder).parse_chr_state_labels
|
791
|
+
|
792
|
+
foo = builder.nexus_file
|
793
|
+
|
794
|
+
assert_equal "3", foo.characters[0].states['0'].name
|
795
|
+
assert_equal "9", foo.characters[0].states['1'].name
|
796
|
+
assert_equal "27", foo.characters[0].states['2'].name
|
797
|
+
assert_equal "asdf", foo.characters[0].states['3'].name
|
798
|
+
end
|
799
|
+
|
800
|
+
def test_non_label_character_name_character_labels
|
801
|
+
input = 'CHARSTATELABELS
|
802
|
+
1 (intentionally_blank) /,
|
803
|
+
2 /,
|
804
|
+
3 %_coverage /,
|
805
|
+
4 #_of_widgets /,
|
806
|
+
5 !endangered! /,
|
807
|
+
6 @the_front /,
|
808
|
+
7 =antennae,
|
809
|
+
8 `a_=_2` /,
|
810
|
+
9 -35_or-36 ,
|
811
|
+
10 27_or_less /,
|
812
|
+
11 fine_not_fine /,
|
813
|
+
12 3,
|
814
|
+
;'
|
815
|
+
|
816
|
+
builder = NexusParser::Builder.new
|
817
|
+
lexer = NexusParser::Lexer.new(input)
|
818
|
+
|
819
|
+
(0..11).each{builder.stub_chr()}
|
820
|
+
|
821
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
822
|
+
|
823
|
+
foo = builder.nexus_file
|
824
|
+
|
825
|
+
assert_equal 12, foo.characters.size
|
826
|
+
assert_equal "(intentionally_blank)", foo.characters[0].name
|
827
|
+
assert_equal "Undefined", foo.characters[1].name
|
828
|
+
assert_equal "%_coverage", foo.characters[2].name
|
829
|
+
assert_equal "#_of_widgets", foo.characters[3].name
|
830
|
+
assert_equal "!endangered!", foo.characters[4].name
|
831
|
+
assert_equal "@the_front", foo.characters[5].name
|
832
|
+
assert_equal "=antennae", foo.characters[6].name # =3
|
833
|
+
assert_equal "`a_=_2`", foo.characters[7].name
|
834
|
+
assert_equal "-35_or-36", foo.characters[8].name
|
835
|
+
assert_equal "27_or_less", foo.characters[9].name
|
836
|
+
assert_equal "fine_not_fine", foo.characters[10].name
|
837
|
+
assert_equal "3", foo.characters[11].name
|
838
|
+
end
|
839
|
+
|
658
840
|
def test_parse_chr_state_labels
|
659
841
|
input =" CHARSTATELABELS
|
660
842
|
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
@@ -736,6 +918,169 @@ class Test_Parser < Test::Unit::TestCase
|
|
736
918
|
|
737
919
|
end
|
738
920
|
|
921
|
+
def test_parse_chr_labels
|
922
|
+
input =" CHARLABELS
|
923
|
+
_
|
924
|
+
'Maxillary teeth'
|
925
|
+
as_df
|
926
|
+
'Highest number of maxillary teeth (or alveoli):';
|
927
|
+
STATELABELS
|
928
|
+
1 more more more,"
|
929
|
+
|
930
|
+
builder = NexusParser::Builder.new
|
931
|
+
lexer = NexusParser::Lexer.new(input)
|
932
|
+
|
933
|
+
(0..3).each{builder.stub_chr()}
|
934
|
+
|
935
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_labels
|
936
|
+
|
937
|
+
foo = builder.nexus_file
|
938
|
+
assert_equal 4, foo.characters.size
|
939
|
+
assert_equal 'Undefined', foo.characters[0].name
|
940
|
+
assert_equal 'Maxillary teeth', foo.characters[1].name
|
941
|
+
assert_equal 'as_df', foo.characters[2].name
|
942
|
+
assert_equal 'Highest number of maxillary teeth (or alveoli):', foo.characters[3].name
|
943
|
+
end
|
944
|
+
|
945
|
+
def test_parse_state_labels
|
946
|
+
input =" STATELABELS
|
947
|
+
1 norm modified,
|
948
|
+
3,
|
949
|
+
4 pres
|
950
|
+
;
|
951
|
+
CHARLABELS;
|
952
|
+
"
|
953
|
+
|
954
|
+
builder = NexusParser::Builder.new
|
955
|
+
lexer = NexusParser::Lexer.new(input)
|
956
|
+
|
957
|
+
(0..3).each{builder.stub_chr()}
|
958
|
+
|
959
|
+
NexusParser::Parser.new(lexer,builder).parse_state_labels
|
960
|
+
|
961
|
+
foo = builder.nexus_file
|
962
|
+
assert_equal 4, foo.characters.size
|
963
|
+
|
964
|
+
assert_equal "norm", foo.characters[0].states["0"].name
|
965
|
+
assert_equal "modified", foo.characters[0].states["1"].name
|
966
|
+
|
967
|
+
assert_empty foo.characters[1].states
|
968
|
+
|
969
|
+
assert_empty foo.characters[2].states
|
970
|
+
|
971
|
+
assert_equal "pres", foo.characters[3].states["0"].name
|
972
|
+
end
|
973
|
+
|
974
|
+
def test_non_label_character_state_character_labels
|
975
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
976
|
+
.5
|
977
|
+
.1.2_form
|
978
|
+
idsimple
|
979
|
+
%_of_length_less_than_10
|
980
|
+
!poisonous!
|
981
|
+
#_is_3_or_4
|
982
|
+
(leave_as_is)
|
983
|
+
@12_o_clock
|
984
|
+
>2
|
985
|
+
~equal
|
986
|
+
=9
|
987
|
+
;'
|
988
|
+
|
989
|
+
builder = NexusParser::Builder.new
|
990
|
+
lexer = NexusParser::Lexer.new(input)
|
991
|
+
|
992
|
+
builder.stub_chr()
|
993
|
+
|
994
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
995
|
+
|
996
|
+
foo = builder.nexus_file
|
997
|
+
|
998
|
+
assert_equal ".5", foo.characters[0].states["0"].name
|
999
|
+
assert_equal ".1.2_form", foo.characters[0].states["1"].name
|
1000
|
+
assert_equal "idsimple", foo.characters[0].states["2"].name
|
1001
|
+
assert_equal "%_of_length_less_than_10", foo.characters[0].states["3"].name
|
1002
|
+
assert_equal "!poisonous!", foo.characters[0].states["4"].name
|
1003
|
+
assert_equal "#_is_3_or_4", foo.characters[0].states["5"].name
|
1004
|
+
assert_equal "(leave_as_is)", foo.characters[0].states["6"].name
|
1005
|
+
assert_equal "@12_o_clock", foo.characters[0].states["7"].name
|
1006
|
+
assert_equal ">2", foo.characters[0].states["8"].name
|
1007
|
+
assert_equal "~equal", foo.characters[0].states["9"].name
|
1008
|
+
assert_equal "=9", foo.characters[0].states["10"].name
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
def test_arbitrary_quote_and_quotelike_character_state_labels
|
1012
|
+
# We could tighten up our handling of accidentally unclosed quotes, but
|
1013
|
+
# there's pretty much no way to recover in general, so we're not testing
|
1014
|
+
# them here.
|
1015
|
+
# Things like ""asdf" " failing is a known issue (maybe not solvable with
|
1016
|
+
# regular expressions?).
|
1017
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1018
|
+
"asd, \'f\'"
|
1019
|
+
""a\'sdf "
|
1020
|
+
\' /as"df/\'
|
1021
|
+
\'asdf;\'
|
1022
|
+
""as, df""
|
1023
|
+
;'
|
1024
|
+
|
1025
|
+
builder = NexusParser::Builder.new
|
1026
|
+
lexer = NexusParser::Lexer.new(input)
|
1027
|
+
|
1028
|
+
builder.stub_chr()
|
1029
|
+
|
1030
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1031
|
+
|
1032
|
+
foo = builder.nexus_file
|
1033
|
+
|
1034
|
+
assert_equal 'asd, \'f\'', foo.characters[0].states["0"].name
|
1035
|
+
assert_equal '"a\'sdf', foo.characters[0].states["1"].name
|
1036
|
+
assert_equal '/as"df/', foo.characters[0].states["2"].name
|
1037
|
+
assert_equal 'asdf;', foo.characters[0].states["3"].name
|
1038
|
+
assert_equal '"as, df"', foo.characters[0].states["4"].name
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
|
1042
|
+
def test_number_label_chr_state_labels
|
1043
|
+
# Character state names that start with numbers
|
1044
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1045
|
+
123abc
|
1046
|
+
-1.23abc
|
1047
|
+
-3e-3abc
|
1048
|
+
25%_or_less_than
|
1049
|
+
;'
|
1050
|
+
|
1051
|
+
builder = NexusParser::Builder.new
|
1052
|
+
lexer = NexusParser::Lexer.new(input)
|
1053
|
+
|
1054
|
+
(0..3).each{builder.stub_chr()}
|
1055
|
+
|
1056
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1057
|
+
|
1058
|
+
foo = builder.nexus_file
|
1059
|
+
|
1060
|
+
assert_equal "123abc", foo.characters[0].states["0"].name
|
1061
|
+
assert_equal "-1.23abc", foo.characters[0].states["1"].name
|
1062
|
+
assert_equal "-3e-3abc", foo.characters[0].states["2"].name
|
1063
|
+
assert_equal "25%_or_less_than", foo.characters[0].states["3"].name
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
def test_value_pair_label_chr_state_labels
|
1067
|
+
# Character state names that are ValuePairs
|
1068
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1069
|
+
234=(a_b_c)
|
1070
|
+
;'
|
1071
|
+
|
1072
|
+
builder = NexusParser::Builder.new
|
1073
|
+
lexer = NexusParser::Lexer.new(input)
|
1074
|
+
|
1075
|
+
builder.stub_chr()
|
1076
|
+
|
1077
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1078
|
+
|
1079
|
+
foo = builder.nexus_file
|
1080
|
+
|
1081
|
+
assert_equal '234=(a_b_c)', foo.characters[0].states["0"].name
|
1082
|
+
end
|
1083
|
+
|
739
1084
|
def DONT_test_parse_really_long_string_of_chr_state_labels
|
740
1085
|
input =" CHARSTATELABELS
|
741
1086
|
1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nexus_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mjy
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-
|
12
|
+
date: 2024-05-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
128
|
- !ruby/object:Gem::Version
|
129
129
|
version: '0'
|
130
130
|
requirements: []
|
131
|
-
rubygems_version: 3.5.
|
131
|
+
rubygems_version: 3.5.9
|
132
132
|
signing_key:
|
133
133
|
specification_version: 4
|
134
134
|
summary: A Nexus file format (phylogenetic inference) parser in Ruby.
|