nexus_parser 1.2.0 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -35,18 +35,18 @@ class Test_Lexer < Test::Unit::TestCase
35
35
  def test_lexer
36
36
  lexer = NexusParser::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
37
37
  assert lexer.pop(NexusParser::Tokens::LBracket)
38
- assert id = lexer.pop(NexusParser::Tokens::ID)
38
+ assert id = lexer.pop(NexusParser::Tokens::Label)
39
39
  assert_equal(id.value, "foo")
40
40
  assert lexer.pop(NexusParser::Tokens::RBracket)
41
41
  assert lexer.pop(NexusParser::Tokens::BeginBlk)
42
42
  assert lexer.pop(NexusParser::Tokens::TaxaBlk)
43
- assert foo = lexer.pop(NexusParser::Tokens::ID)
43
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
44
44
  assert_equal("BLORF", foo.value) # truncating whitespace
45
45
  assert lexer.pop(NexusParser::Tokens::BlkEnd)
46
46
 
47
47
  lexer2 = NexusParser::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
48
48
  assert lexer2.pop(NexusParser::Tokens::LBracket)
49
- assert id = lexer2.pop(NexusParser::Tokens::ID)
49
+ assert id = lexer2.pop(NexusParser::Tokens::Label)
50
50
  assert_equal(id.value, "foo")
51
51
  assert lexer2.pop(NexusParser::Tokens::RBracket)
52
52
  assert lexer2.pop(NexusParser::Tokens::BeginBlk)
@@ -56,46 +56,52 @@ class Test_Lexer < Test::Unit::TestCase
56
56
  assert lexer2.pop(NexusParser::Tokens::LParen)
57
57
  assert lexer2.pop(NexusParser::Tokens::RParen)
58
58
 
59
+ lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
60
+ assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
61
+ assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
62
+ assert lexer2a.pop(NexusParser::Tokens::LBracket)
63
+ assert lexer2a.pop(NexusParser::Tokens::RBracket)
64
+
59
65
  lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
60
66
  assert lexer3.pop(NexusParser::Tokens::LBracket)
61
- assert id = lexer3.pop(NexusParser::Tokens::ID)
67
+ assert id = lexer3.pop(NexusParser::Tokens::Label)
62
68
  assert_equal(id.value, "foo")
63
69
  assert lexer3.pop(NexusParser::Tokens::RBracket)
64
70
  assert lexer3.pop(NexusParser::Tokens::BeginBlk)
65
71
  assert lexer3.pop(NexusParser::Tokens::ChrsBlk)
66
- assert foo = lexer3.pop(NexusParser::Tokens::ID)
72
+ assert foo = lexer3.pop(NexusParser::Tokens::Label)
67
73
  assert_equal("BLORF", foo.value)
68
74
  assert lexer3.pop(NexusParser::Tokens::BlkEnd)
69
75
 
70
76
  lexer4 = NexusParser::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
71
77
  assert lexer4.pop(NexusParser::Tokens::BeginBlk)
72
78
  assert lexer4.pop(NexusParser::Tokens::ChrsBlk)
73
- assert foo = lexer4.pop(NexusParser::Tokens::Number)
74
- assert_equal(123123123, foo.value)
79
+ assert foo = lexer4.pop(NexusParser::Tokens::PositiveInteger)
80
+ assert_equal('123123123', foo.value)
75
81
  assert lexer4.pop(NexusParser::Tokens::BlkEnd)
76
82
 
77
83
  lexer5 = NexusParser::Lexer.new("(0,1)")
78
84
  assert lexer5.pop(NexusParser::Tokens::LParen)
79
- assert foo = lexer5.pop(NexusParser::Tokens::Number)
80
- assert_equal(0, foo.value)
85
+ assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
86
+ assert_equal('0', foo.value)
81
87
  assert lexer5.pop(NexusParser::Tokens::Comma)
82
- assert foo = lexer5.pop(NexusParser::Tokens::Number)
83
- assert_equal(1, foo.value)
88
+ assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
89
+ assert_equal('1', foo.value)
84
90
  assert lexer5.pop(NexusParser::Tokens::RParen)
85
91
 
86
92
  lexer6 = NexusParser::Lexer.new(" 210(0,1)10A1\n")
87
93
  assert foo = lexer6.pop(NexusParser::Tokens::RowVec)
88
94
  assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
89
95
 
90
- lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{3 4 5}(0)(1 a)\n")
96
+ lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{345}(0)(1 a)\n")
91
97
  assert foo = lexer6a.pop(NexusParser::Tokens::RowVec)
92
98
  assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
93
99
 
94
- lexer6b = NexusParser::Lexer.new(" 201{0 1}{0 1}0100)\x0A") # *nix line ending
100
+ lexer6b = NexusParser::Lexer.new(" 201(01){0 1}0100\x0A") # *nix line ending
95
101
  assert foo = lexer6b.pop(NexusParser::Tokens::RowVec)
96
102
  assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
97
103
 
98
- lexer6c = NexusParser::Lexer.new(" 201{0 1}{0 1}0100)\x0D\x0A") # * dos line ending
104
+ lexer6c = NexusParser::Lexer.new(" 201{0 1}{01}0100\x0D\x0A") # * dos line ending
99
105
  assert foo = lexer6c.pop(NexusParser::Tokens::RowVec)
100
106
  assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
101
107
 
@@ -120,7 +126,41 @@ class Test_Lexer < Test::Unit::TestCase
120
126
  def test_row_vec
121
127
  lexer = NexusParser::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
122
128
  assert foo = lexer.pop(NexusParser::Tokens::RowVec)
123
- assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "BD", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
129
+ assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "B", "D", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
130
+ end
131
+
132
+ def test_ungrouped_spaces_in_row_vec
133
+ lexer = NexusParser::Lexer.new("- A 12(BC) ? \n")
134
+ assert foo = lexer.pop(NexusParser::Tokens::RowVec)
135
+ assert_equal(['-', 'A', '1', '2', ['B', 'C'], '?'], foo.value)
136
+ end
137
+
138
+ def test_mismatched_parens_row_vec
139
+ lexer = NexusParser::Lexer.new("01(12(13\n")
140
+ assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
141
+ lexer.pop(NexusParser::Tokens::RowVec)
142
+ }
143
+ end
144
+
145
+ def test_mismatched_groupers_row_vec
146
+ lexer = NexusParser::Lexer.new("01(12}13\n")
147
+ assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
148
+ lexer.pop(NexusParser::Tokens::RowVec)
149
+ }
150
+ end
151
+
152
+ def test_nested_parens_row_vec
153
+ lexer = NexusParser::Lexer.new("01(12(34))13\n")
154
+ assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
155
+ lexer.pop(NexusParser::Tokens::RowVec)
156
+ }
157
+ end
158
+
159
+ def test_unclosed_parens_row_vec
160
+ lexer = NexusParser::Lexer.new("01(123413\n")
161
+ assert_raise_with_message(NexusParser::ParseError, /Unclosed/) {
162
+ lexer.pop(NexusParser::Tokens::RowVec)
163
+ }
124
164
  end
125
165
 
126
166
  def test_punctuation
@@ -149,7 +189,7 @@ class Test_Lexer < Test::Unit::TestCase
149
189
  def test_EndBlk
150
190
  lexer = NexusParser::Lexer.new(" \n\n End ;")
151
191
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
152
- lexer = NexusParser::Lexer.new("\n\nEnd;")
192
+ lexer = NexusParser::Lexer.new("\n\nEndblock;")
153
193
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
154
194
 
155
195
  lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
@@ -401,13 +441,13 @@ class Test_Lexer < Test::Unit::TestCase
401
441
  CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
402
442
 
403
443
 
404
- END;
444
+ ENDBLOCK;
405
445
 
406
446
  BEGIN some other block;")
407
447
 
408
448
  assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
409
449
  assert_equal 'LABELS', foo.value.slice(0,6)
410
- assert_equal 'END;', foo.value.slice(-4,4)
450
+ assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
411
451
  end
412
452
 
413
453
  def test_SetsBlk
@@ -422,11 +462,6 @@ class Test_Lexer < Test::Unit::TestCase
422
462
  assert_equal 'SETS', foo.value.slice(0,4)
423
463
  assert_equal 'END;', foo.value.slice(-4,4)
424
464
  end
425
-
426
- def test_lexer_errors
427
- lexer = NexusParser::Lexer.new("*&")
428
- assert_raise(NexusParser::ParseError) {lexer.peek(NexusParser::Tokens::ID)}
429
- end
430
465
  end
431
466
 
432
467
 
@@ -513,8 +548,6 @@ class Test_Parser < Test::Unit::TestCase
513
548
  assert_equal "Tetragnatha", foo.taxa[9].name
514
549
  end
515
550
 
516
-
517
-
518
551
  def test_parse_characters_blk
519
552
  input= "
520
553
  TITLE 'Scharff&Coddington_1997_Araneidae';
@@ -570,6 +603,30 @@ class Test_Parser < Test::Unit::TestCase
570
603
  assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
571
604
  end
572
605
 
606
+ def test_matrix_with_short_row
607
+ input= "
608
+ DIMENSIONS NCHAR=2;
609
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
610
+ CHARSTATELABELS
611
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger;
612
+ MATRIX
613
+ Dictyna 0?
614
+ Uloborus ??
615
+ Deinopis 0
616
+ ;
617
+ END;"
618
+
619
+ builder = NexusParser::Builder.new
620
+ @lexer = NexusParser::Lexer.new(input)
621
+
622
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
623
+ (0..2).each{|i| builder.stub_taxon}
624
+
625
+ assert_raise_with_message(NexusParser::ParseError, /too short/) {
626
+ NexusParser::Parser.new(@lexer, builder).parse_characters_blk
627
+ }
628
+ end
629
+
573
630
  def test_characters_block_without_IDs_or_title
574
631
  input= "
575
632
  DIMENSIONS NCHAR=10;
@@ -589,7 +646,7 @@ class Test_Parser < Test::Unit::TestCase
589
646
  Tetragnatha 0?01011011
590
647
 
591
648
  ;
592
- END;"
649
+ ENDBLOCK;"
593
650
 
594
651
  builder = NexusParser::Builder.new
595
652
  @lexer = NexusParser::Lexer.new(input)
@@ -619,6 +676,55 @@ class Test_Parser < Test::Unit::TestCase
619
676
  assert_equal 10, foo.characters.size
620
677
  end
621
678
 
679
+ def test_characters_charlabels_statelabels_block
680
+ input= "
681
+ DIMENSIONS NCHAR=4;
682
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
683
+ CHARLABELS
684
+ Tibia_II
685
+ TII_macrosetae
686
+ 'Femoral tuber'
687
+ _
688
+ ;
689
+ STATELABELS
690
+ 1 norm modified,
691
+ 3 3 3.5 4,
692
+ 4 pres
693
+ ;
694
+ MATRIX
695
+ Dictyna -?1(01)
696
+ Uloborus 0321
697
+ ;
698
+ ENDBLOCK;"
699
+
700
+ builder = NexusParser::Builder.new
701
+ lexer = NexusParser::Lexer.new(input)
702
+
703
+ (0..3).each{|i| builder.stub_taxon}
704
+
705
+ NexusParser::Parser.new(lexer,builder).parse_characters_blk
706
+ foo = builder.nexus_file
707
+
708
+ assert_equal 4, foo.characters.size
709
+ assert_equal "Femoral tuber", foo.characters[2].name
710
+ assert_equal "Undefined", foo.characters[3].name
711
+
712
+ assert_equal "norm", foo.characters[0].states["0"].name
713
+ assert_equal "modified", foo.characters[0].states["1"].name
714
+
715
+ assert_equal "", foo.characters[1].states["3"].name
716
+
717
+ assert_equal ["3", "3.5", "4"], foo.characters[2].states.keys.collect{|s| foo.characters[2].states[s].name}.sort
718
+
719
+ assert_equal "", foo.characters[1].states["3"].name
720
+
721
+ assert_equal ["-"], foo.codings[0][0].states
722
+ assert_equal ["?"], foo.codings[0][1].states
723
+ assert_equal ["0", "1"], foo.codings[0][3].states
724
+
725
+ assert_equal ["3"], foo.codings[1][1].states
726
+ end
727
+
622
728
  def test_codings
623
729
  foo = parse_nexus_file(@nf)
624
730
  assert_equal 100, foo.codings.flatten.size # two multistates count in single cells
@@ -655,6 +761,82 @@ class Test_Parser < Test::Unit::TestCase
655
761
  # add test that nothing is left in lexer
656
762
  end
657
763
 
764
+ def test_parse_format_respect_case
765
+ input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
766
+ builder = NexusParser::Builder.new
767
+ lexer = NexusParser::Lexer.new(input)
768
+
769
+ NexusParser::Parser.new(lexer,builder).parse_format
770
+ foo = builder.nexus_file
771
+
772
+ assert_equal "STANDARD", foo.vars[:datatype]
773
+ assert_equal "-", foo.vars[:gap]
774
+ assert_equal "?", foo.vars[:missing]
775
+ assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
776
+ end
777
+
778
+ # https://github.com/mjy/nexus_parser/issues/9
779
+ def test_three_both_numeric_and_label_state_names_in_a_row
780
+ input =" CHARSTATELABELS
781
+ 1 'Metatarsal trichobothria (CodAra.29)' / 3 9 27 asdf;
782
+ Matrix
783
+ fooo 01 more stuff here that should not be hit"
784
+
785
+ builder = NexusParser::Builder.new
786
+ lexer = NexusParser::Lexer.new(input)
787
+
788
+ builder.stub_chr()
789
+
790
+ NexusParser::Parser.new(lexer, builder).parse_chr_state_labels
791
+
792
+ foo = builder.nexus_file
793
+
794
+ assert_equal "3", foo.characters[0].states['0'].name
795
+ assert_equal "9", foo.characters[0].states['1'].name
796
+ assert_equal "27", foo.characters[0].states['2'].name
797
+ assert_equal "asdf", foo.characters[0].states['3'].name
798
+ end
799
+
800
+ def test_non_label_character_name_character_labels
801
+ input = 'CHARSTATELABELS
802
+ 1 (intentionally_blank) /,
803
+ 2 /,
804
+ 3 %_coverage /,
805
+ 4 #_of_widgets /,
806
+ 5 !endangered! /,
807
+ 6 @the_front /,
808
+ 7 =antennae,
809
+ 8 `a_=_2` /,
810
+ 9 -35_or-36 ,
811
+ 10 27_or_less /,
812
+ 11 fine_not_fine /,
813
+ 12 3,
814
+ ;'
815
+
816
+ builder = NexusParser::Builder.new
817
+ lexer = NexusParser::Lexer.new(input)
818
+
819
+ (0..11).each{builder.stub_chr()}
820
+
821
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
822
+
823
+ foo = builder.nexus_file
824
+
825
+ assert_equal 12, foo.characters.size
826
+ assert_equal "(intentionally_blank)", foo.characters[0].name
827
+ assert_equal "Undefined", foo.characters[1].name
828
+ assert_equal "%_coverage", foo.characters[2].name
829
+ assert_equal "#_of_widgets", foo.characters[3].name
830
+ assert_equal "!endangered!", foo.characters[4].name
831
+ assert_equal "@the_front", foo.characters[5].name
832
+ assert_equal "=antennae", foo.characters[6].name # =3
833
+ assert_equal "`a_=_2`", foo.characters[7].name
834
+ assert_equal "-35_or-36", foo.characters[8].name
835
+ assert_equal "27_or_less", foo.characters[9].name
836
+ assert_equal "fine_not_fine", foo.characters[10].name
837
+ assert_equal "3", foo.characters[11].name
838
+ end
839
+
658
840
  def test_parse_chr_state_labels
659
841
  input =" CHARSTATELABELS
660
842
  1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
@@ -736,6 +918,169 @@ class Test_Parser < Test::Unit::TestCase
736
918
 
737
919
  end
738
920
 
921
+ def test_parse_chr_labels
922
+ input =" CHARLABELS
923
+ _
924
+ 'Maxillary teeth'
925
+ as_df
926
+ 'Highest number of maxillary teeth (or alveoli):';
927
+ STATELABELS
928
+ 1 more more more,"
929
+
930
+ builder = NexusParser::Builder.new
931
+ lexer = NexusParser::Lexer.new(input)
932
+
933
+ (0..3).each{builder.stub_chr()}
934
+
935
+ NexusParser::Parser.new(lexer,builder).parse_chr_labels
936
+
937
+ foo = builder.nexus_file
938
+ assert_equal 4, foo.characters.size
939
+ assert_equal 'Undefined', foo.characters[0].name
940
+ assert_equal 'Maxillary teeth', foo.characters[1].name
941
+ assert_equal 'as_df', foo.characters[2].name
942
+ assert_equal 'Highest number of maxillary teeth (or alveoli):', foo.characters[3].name
943
+ end
944
+
945
+ def test_parse_state_labels
946
+ input =" STATELABELS
947
+ 1 norm modified,
948
+ 3,
949
+ 4 pres
950
+ ;
951
+ CHARLABELS;
952
+ "
953
+
954
+ builder = NexusParser::Builder.new
955
+ lexer = NexusParser::Lexer.new(input)
956
+
957
+ (0..3).each{builder.stub_chr()}
958
+
959
+ NexusParser::Parser.new(lexer,builder).parse_state_labels
960
+
961
+ foo = builder.nexus_file
962
+ assert_equal 4, foo.characters.size
963
+
964
+ assert_equal "norm", foo.characters[0].states["0"].name
965
+ assert_equal "modified", foo.characters[0].states["1"].name
966
+
967
+ assert_empty foo.characters[1].states
968
+
969
+ assert_empty foo.characters[2].states
970
+
971
+ assert_equal "pres", foo.characters[3].states["0"].name
972
+ end
973
+
974
+ def test_non_label_character_state_character_labels
975
+ input = 'CHARSTATELABELS 1 Tibia_II /
976
+ .5
977
+ .1.2_form
978
+ idsimple
979
+ %_of_length_less_than_10
980
+ !poisonous!
981
+ #_is_3_or_4
982
+ (leave_as_is)
983
+ @12_o_clock
984
+ >2
985
+ ~equal
986
+ =9
987
+ ;'
988
+
989
+ builder = NexusParser::Builder.new
990
+ lexer = NexusParser::Lexer.new(input)
991
+
992
+ builder.stub_chr()
993
+
994
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
995
+
996
+ foo = builder.nexus_file
997
+
998
+ assert_equal ".5", foo.characters[0].states["0"].name
999
+ assert_equal ".1.2_form", foo.characters[0].states["1"].name
1000
+ assert_equal "idsimple", foo.characters[0].states["2"].name
1001
+ assert_equal "%_of_length_less_than_10", foo.characters[0].states["3"].name
1002
+ assert_equal "!poisonous!", foo.characters[0].states["4"].name
1003
+ assert_equal "#_is_3_or_4", foo.characters[0].states["5"].name
1004
+ assert_equal "(leave_as_is)", foo.characters[0].states["6"].name
1005
+ assert_equal "@12_o_clock", foo.characters[0].states["7"].name
1006
+ assert_equal ">2", foo.characters[0].states["8"].name
1007
+ assert_equal "~equal", foo.characters[0].states["9"].name
1008
+ assert_equal "=9", foo.characters[0].states["10"].name
1009
+ end
1010
+
1011
+ def test_arbitrary_quote_and_quotelike_character_state_labels
1012
+ # We could tighten up our handling of accidentally unclosed quotes, but
1013
+ # there's pretty much no way to recover in general, so we're not testing
1014
+ # them here.
1015
+ # Things like ""asdf" " failing is a known issue (maybe not solvable with
1016
+ # regular expressions?).
1017
+ input = 'CHARSTATELABELS 1 Tibia_II /
1018
+ "asd, \'f\'"
1019
+ ""a\'sdf "
1020
+ \' /as"df/\'
1021
+ \'asdf;\'
1022
+ ""as, df""
1023
+ ;'
1024
+
1025
+ builder = NexusParser::Builder.new
1026
+ lexer = NexusParser::Lexer.new(input)
1027
+
1028
+ builder.stub_chr()
1029
+
1030
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
1031
+
1032
+ foo = builder.nexus_file
1033
+
1034
+ assert_equal 'asd, \'f\'', foo.characters[0].states["0"].name
1035
+ assert_equal '"a\'sdf', foo.characters[0].states["1"].name
1036
+ assert_equal '/as"df/', foo.characters[0].states["2"].name
1037
+ assert_equal 'asdf;', foo.characters[0].states["3"].name
1038
+ assert_equal '"as, df"', foo.characters[0].states["4"].name
1039
+ end
1040
+
1041
+
1042
+ def test_number_label_chr_state_labels
1043
+ # Character state names that start with numbers
1044
+ input = 'CHARSTATELABELS 1 Tibia_II /
1045
+ 123abc
1046
+ -1.23abc
1047
+ -3e-3abc
1048
+ 25%_or_less_than
1049
+ ;'
1050
+
1051
+ builder = NexusParser::Builder.new
1052
+ lexer = NexusParser::Lexer.new(input)
1053
+
1054
+ (0..3).each{builder.stub_chr()}
1055
+
1056
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
1057
+
1058
+ foo = builder.nexus_file
1059
+
1060
+ assert_equal "123abc", foo.characters[0].states["0"].name
1061
+ assert_equal "-1.23abc", foo.characters[0].states["1"].name
1062
+ assert_equal "-3e-3abc", foo.characters[0].states["2"].name
1063
+ assert_equal "25%_or_less_than", foo.characters[0].states["3"].name
1064
+ end
1065
+
1066
+ def test_value_pair_label_chr_state_labels
1067
+ # Character state names that are ValuePairs
1068
+ input = 'CHARSTATELABELS 1 Tibia_II /
1069
+ 234=(a_b_c)
1070
+ ;'
1071
+
1072
+ builder = NexusParser::Builder.new
1073
+ lexer = NexusParser::Lexer.new(input)
1074
+
1075
+ builder.stub_chr()
1076
+
1077
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
1078
+
1079
+ foo = builder.nexus_file
1080
+
1081
+ assert_equal '234=(a_b_c)', foo.characters[0].states["0"].name
1082
+ end
1083
+
739
1084
  def DONT_test_parse_really_long_string_of_chr_state_labels
740
1085
  input =" CHARSTATELABELS
741
1086
  1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nexus_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - mjy
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-04-03 00:00:00.000000000 Z
12
+ date: 2024-05-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
128
  - !ruby/object:Gem::Version
129
129
  version: '0'
130
130
  requirements: []
131
- rubygems_version: 3.5.3
131
+ rubygems_version: 3.5.9
132
132
  signing_key:
133
133
  specification_version: 4
134
134
  summary: A Nexus file format (phylogenetic inference) parser in Ruby.