nexus_parser 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,18 +35,18 @@ class Test_Lexer < Test::Unit::TestCase
35
35
  def test_lexer
36
36
  lexer = NexusParser::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
37
37
  assert lexer.pop(NexusParser::Tokens::LBracket)
38
- assert id = lexer.pop(NexusParser::Tokens::ID)
38
+ assert id = lexer.pop(NexusParser::Tokens::Label)
39
39
  assert_equal(id.value, "foo")
40
40
  assert lexer.pop(NexusParser::Tokens::RBracket)
41
41
  assert lexer.pop(NexusParser::Tokens::BeginBlk)
42
42
  assert lexer.pop(NexusParser::Tokens::TaxaBlk)
43
- assert foo = lexer.pop(NexusParser::Tokens::ID)
43
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
44
44
  assert_equal("BLORF", foo.value) # truncating whitespace
45
45
  assert lexer.pop(NexusParser::Tokens::BlkEnd)
46
46
 
47
47
  lexer2 = NexusParser::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
48
48
  assert lexer2.pop(NexusParser::Tokens::LBracket)
49
- assert id = lexer2.pop(NexusParser::Tokens::ID)
49
+ assert id = lexer2.pop(NexusParser::Tokens::Label)
50
50
  assert_equal(id.value, "foo")
51
51
  assert lexer2.pop(NexusParser::Tokens::RBracket)
52
52
  assert lexer2.pop(NexusParser::Tokens::BeginBlk)
@@ -56,46 +56,52 @@ class Test_Lexer < Test::Unit::TestCase
56
56
  assert lexer2.pop(NexusParser::Tokens::LParen)
57
57
  assert lexer2.pop(NexusParser::Tokens::RParen)
58
58
 
59
+ lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
60
+ assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
61
+ assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
62
+ assert lexer2a.pop(NexusParser::Tokens::LBracket)
63
+ assert lexer2a.pop(NexusParser::Tokens::RBracket)
64
+
59
65
  lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
60
66
  assert lexer3.pop(NexusParser::Tokens::LBracket)
61
- assert id = lexer3.pop(NexusParser::Tokens::ID)
67
+ assert id = lexer3.pop(NexusParser::Tokens::Label)
62
68
  assert_equal(id.value, "foo")
63
69
  assert lexer3.pop(NexusParser::Tokens::RBracket)
64
70
  assert lexer3.pop(NexusParser::Tokens::BeginBlk)
65
71
  assert lexer3.pop(NexusParser::Tokens::ChrsBlk)
66
- assert foo = lexer3.pop(NexusParser::Tokens::ID)
72
+ assert foo = lexer3.pop(NexusParser::Tokens::Label)
67
73
  assert_equal("BLORF", foo.value)
68
74
  assert lexer3.pop(NexusParser::Tokens::BlkEnd)
69
75
 
70
76
  lexer4 = NexusParser::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
71
77
  assert lexer4.pop(NexusParser::Tokens::BeginBlk)
72
78
  assert lexer4.pop(NexusParser::Tokens::ChrsBlk)
73
- assert foo = lexer4.pop(NexusParser::Tokens::Number)
74
- assert_equal(123123123, foo.value)
79
+ assert foo = lexer4.pop(NexusParser::Tokens::PositiveInteger)
80
+ assert_equal('123123123', foo.value)
75
81
  assert lexer4.pop(NexusParser::Tokens::BlkEnd)
76
82
 
77
83
  lexer5 = NexusParser::Lexer.new("(0,1)")
78
84
  assert lexer5.pop(NexusParser::Tokens::LParen)
79
- assert foo = lexer5.pop(NexusParser::Tokens::Number)
80
- assert_equal(0, foo.value)
85
+ assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
86
+ assert_equal('0', foo.value)
81
87
  assert lexer5.pop(NexusParser::Tokens::Comma)
82
- assert foo = lexer5.pop(NexusParser::Tokens::Number)
83
- assert_equal(1, foo.value)
88
+ assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
89
+ assert_equal('1', foo.value)
84
90
  assert lexer5.pop(NexusParser::Tokens::RParen)
85
91
 
86
92
  lexer6 = NexusParser::Lexer.new(" 210(0,1)10A1\n")
87
93
  assert foo = lexer6.pop(NexusParser::Tokens::RowVec)
88
94
  assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
89
95
 
90
- lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{3 4 5}(0)(1 a)\n")
96
+ lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{345}(0)(1 a)\n")
91
97
  assert foo = lexer6a.pop(NexusParser::Tokens::RowVec)
92
98
  assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
93
99
 
94
- lexer6b = NexusParser::Lexer.new(" 201{0 1}{0 1}0100)\x0A") # *nix line ending
100
+ lexer6b = NexusParser::Lexer.new(" 201(01){0 1}0100\x0A") # *nix line ending
95
101
  assert foo = lexer6b.pop(NexusParser::Tokens::RowVec)
96
102
  assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
97
103
 
98
- lexer6c = NexusParser::Lexer.new(" 201{0 1}{0 1}0100)\x0D\x0A") # * dos line ending
104
+ lexer6c = NexusParser::Lexer.new(" 201{0 1}{01}0100\x0D\x0A") # * dos line ending
99
105
  assert foo = lexer6c.pop(NexusParser::Tokens::RowVec)
100
106
  assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
101
107
 
@@ -120,7 +126,41 @@ class Test_Lexer < Test::Unit::TestCase
120
126
  def test_row_vec
121
127
  lexer = NexusParser::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
122
128
  assert foo = lexer.pop(NexusParser::Tokens::RowVec)
123
- assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "BD", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
129
+ assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "B", "D", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
130
+ end
131
+
132
+ def test_ungrouped_spaces_in_row_vec
133
+ lexer = NexusParser::Lexer.new("- A 12(BC) ? \n")
134
+ assert foo = lexer.pop(NexusParser::Tokens::RowVec)
135
+ assert_equal(['-', 'A', '1', '2', ['B', 'C'], '?'], foo.value)
136
+ end
137
+
138
+ def test_mismatched_parens_row_vec
139
+ lexer = NexusParser::Lexer.new("01(12(13\n")
140
+ assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
141
+ lexer.pop(NexusParser::Tokens::RowVec)
142
+ }
143
+ end
144
+
145
+ def test_mismatched_groupers_row_vec
146
+ lexer = NexusParser::Lexer.new("01(12}13\n")
147
+ assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
148
+ lexer.pop(NexusParser::Tokens::RowVec)
149
+ }
150
+ end
151
+
152
+ def test_nested_parens_row_vec
153
+ lexer = NexusParser::Lexer.new("01(12(34))13\n")
154
+ assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
155
+ lexer.pop(NexusParser::Tokens::RowVec)
156
+ }
157
+ end
158
+
159
+ def test_unclosed_parens_row_vec
160
+ lexer = NexusParser::Lexer.new("01(123413\n")
161
+ assert_raise_with_message(NexusParser::ParseError, /Unclosed/) {
162
+ lexer.pop(NexusParser::Tokens::RowVec)
163
+ }
124
164
  end
125
165
 
126
166
  def test_punctuation
@@ -149,7 +189,7 @@ class Test_Lexer < Test::Unit::TestCase
149
189
  def test_EndBlk
150
190
  lexer = NexusParser::Lexer.new(" \n\n End ;")
151
191
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
152
- lexer = NexusParser::Lexer.new("\n\nEnd;")
192
+ lexer = NexusParser::Lexer.new("\n\nEndblock;")
153
193
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
154
194
 
155
195
  lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
@@ -401,13 +441,13 @@ class Test_Lexer < Test::Unit::TestCase
401
441
  CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
402
442
 
403
443
 
404
- END;
444
+ ENDBLOCK;
405
445
 
406
446
  BEGIN some other block;")
407
447
 
408
448
  assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
409
449
  assert_equal 'LABELS', foo.value.slice(0,6)
410
- assert_equal 'END;', foo.value.slice(-4,4)
450
+ assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
411
451
  end
412
452
 
413
453
  def test_SetsBlk
@@ -422,11 +462,6 @@ class Test_Lexer < Test::Unit::TestCase
422
462
  assert_equal 'SETS', foo.value.slice(0,4)
423
463
  assert_equal 'END;', foo.value.slice(-4,4)
424
464
  end
425
-
426
- def test_lexer_errors
427
- lexer = NexusParser::Lexer.new("*&")
428
- assert_raise(NexusParser::ParseError) {lexer.peek(NexusParser::Tokens::ID)}
429
- end
430
465
  end
431
466
 
432
467
 
@@ -513,8 +548,6 @@ class Test_Parser < Test::Unit::TestCase
513
548
  assert_equal "Tetragnatha", foo.taxa[9].name
514
549
  end
515
550
 
516
-
517
-
518
551
  def test_parse_characters_blk
519
552
  input= "
520
553
  TITLE 'Scharff&Coddington_1997_Araneidae';
@@ -570,6 +603,30 @@ class Test_Parser < Test::Unit::TestCase
570
603
  assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
571
604
  end
572
605
 
606
+ def test_matrix_with_short_row
607
+ input= "
608
+ DIMENSIONS NCHAR=2;
609
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
610
+ CHARSTATELABELS
611
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger;
612
+ MATRIX
613
+ Dictyna 0?
614
+ Uloborus ??
615
+ Deinopis 0
616
+ ;
617
+ END;"
618
+
619
+ builder = NexusParser::Builder.new
620
+ @lexer = NexusParser::Lexer.new(input)
621
+
622
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
623
+ (0..2).each{|i| builder.stub_taxon}
624
+
625
+ assert_raise_with_message(NexusParser::ParseError, /too short/) {
626
+ NexusParser::Parser.new(@lexer, builder).parse_characters_blk
627
+ }
628
+ end
629
+
573
630
  def test_characters_block_without_IDs_or_title
574
631
  input= "
575
632
  DIMENSIONS NCHAR=10;
@@ -589,7 +646,7 @@ class Test_Parser < Test::Unit::TestCase
589
646
  Tetragnatha 0?01011011
590
647
 
591
648
  ;
592
- END;"
649
+ ENDBLOCK;"
593
650
 
594
651
  builder = NexusParser::Builder.new
595
652
  @lexer = NexusParser::Lexer.new(input)
@@ -619,6 +676,55 @@ class Test_Parser < Test::Unit::TestCase
619
676
  assert_equal 10, foo.characters.size
620
677
  end
621
678
 
679
+ def test_characters_charlabels_statelabels_block
680
+ input= "
681
+ DIMENSIONS NCHAR=4;
682
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
683
+ CHARLABELS
684
+ Tibia_II
685
+ TII_macrosetae
686
+ 'Femoral tuber'
687
+ _
688
+ ;
689
+ STATELABELS
690
+ 1 norm modified,
691
+ 3 3 3.5 4,
692
+ 4 pres
693
+ ;
694
+ MATRIX
695
+ Dictyna -?1(01)
696
+ Uloborus 0321
697
+ ;
698
+ ENDBLOCK;"
699
+
700
+ builder = NexusParser::Builder.new
701
+ lexer = NexusParser::Lexer.new(input)
702
+
703
+ (0..3).each{|i| builder.stub_taxon}
704
+
705
+ NexusParser::Parser.new(lexer,builder).parse_characters_blk
706
+ foo = builder.nexus_file
707
+
708
+ assert_equal 4, foo.characters.size
709
+ assert_equal "Femoral tuber", foo.characters[2].name
710
+ assert_equal "Undefined", foo.characters[3].name
711
+
712
+ assert_equal "norm", foo.characters[0].states["0"].name
713
+ assert_equal "modified", foo.characters[0].states["1"].name
714
+
715
+ assert_equal "", foo.characters[1].states["3"].name
716
+
717
+ assert_equal ["3", "3.5", "4"], foo.characters[2].states.keys.collect{|s| foo.characters[2].states[s].name}.sort
718
+
719
+ assert_equal "", foo.characters[1].states["3"].name
720
+
721
+ assert_equal ["-"], foo.codings[0][0].states
722
+ assert_equal ["?"], foo.codings[0][1].states
723
+ assert_equal ["0", "1"], foo.codings[0][3].states
724
+
725
+ assert_equal ["3"], foo.codings[1][1].states
726
+ end
727
+
622
728
  def test_codings
623
729
  foo = parse_nexus_file(@nf)
624
730
  assert_equal 100, foo.codings.flatten.size # two multistates count in single cells
@@ -655,6 +761,82 @@ class Test_Parser < Test::Unit::TestCase
655
761
  # add test that nothing is left in lexer
656
762
  end
657
763
 
764
+ def test_parse_format_respect_case
765
+ input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
766
+ builder = NexusParser::Builder.new
767
+ lexer = NexusParser::Lexer.new(input)
768
+
769
+ NexusParser::Parser.new(lexer,builder).parse_format
770
+ foo = builder.nexus_file
771
+
772
+ assert_equal "STANDARD", foo.vars[:datatype]
773
+ assert_equal "-", foo.vars[:gap]
774
+ assert_equal "?", foo.vars[:missing]
775
+ assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
776
+ end
777
+
778
+ # https://github.com/mjy/nexus_parser/issues/9
779
+ def test_three_both_numeric_and_label_state_names_in_a_row
780
+ input =" CHARSTATELABELS
781
+ 1 'Metatarsal trichobothria (CodAra.29)' / 3 9 27 asdf;
782
+ Matrix
783
+ fooo 01 more stuff here that should not be hit"
784
+
785
+ builder = NexusParser::Builder.new
786
+ lexer = NexusParser::Lexer.new(input)
787
+
788
+ builder.stub_chr()
789
+
790
+ NexusParser::Parser.new(lexer, builder).parse_chr_state_labels
791
+
792
+ foo = builder.nexus_file
793
+
794
+ assert_equal "3", foo.characters[0].states['0'].name
795
+ assert_equal "9", foo.characters[0].states['1'].name
796
+ assert_equal "27", foo.characters[0].states['2'].name
797
+ assert_equal "asdf", foo.characters[0].states['3'].name
798
+ end
799
+
800
+ def test_non_label_character_name_character_labels
801
+ input = 'CHARSTATELABELS
802
+ 1 (intentionally_blank) /,
803
+ 2 /,
804
+ 3 %_coverage /,
805
+ 4 #_of_widgets /,
806
+ 5 !endangered! /,
807
+ 6 @the_front /,
808
+ 7 =antennae,
809
+ 8 `a_=_2` /,
810
+ 9 -35_or-36 ,
811
+ 10 27_or_less /,
812
+ 11 fine_not_fine /,
813
+ 12 3,
814
+ ;'
815
+
816
+ builder = NexusParser::Builder.new
817
+ lexer = NexusParser::Lexer.new(input)
818
+
819
+ (0..11).each{builder.stub_chr()}
820
+
821
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
822
+
823
+ foo = builder.nexus_file
824
+
825
+ assert_equal 12, foo.characters.size
826
+ assert_equal "(intentionally_blank)", foo.characters[0].name
827
+ assert_equal "Undefined", foo.characters[1].name
828
+ assert_equal "%_coverage", foo.characters[2].name
829
+ assert_equal "#_of_widgets", foo.characters[3].name
830
+ assert_equal "!endangered!", foo.characters[4].name
831
+ assert_equal "@the_front", foo.characters[5].name
832
+ assert_equal "=antennae", foo.characters[6].name # =3
833
+ assert_equal "`a_=_2`", foo.characters[7].name
834
+ assert_equal "-35_or-36", foo.characters[8].name
835
+ assert_equal "27_or_less", foo.characters[9].name
836
+ assert_equal "fine_not_fine", foo.characters[10].name
837
+ assert_equal "3", foo.characters[11].name
838
+ end
839
+
658
840
  def test_parse_chr_state_labels
659
841
  input =" CHARSTATELABELS
660
842
  1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
@@ -736,6 +918,169 @@ class Test_Parser < Test::Unit::TestCase
736
918
 
737
919
  end
738
920
 
921
+ def test_parse_chr_labels
922
+ input =" CHARLABELS
923
+ _
924
+ 'Maxillary teeth'
925
+ as_df
926
+ 'Highest number of maxillary teeth (or alveoli):';
927
+ STATELABELS
928
+ 1 more more more,"
929
+
930
+ builder = NexusParser::Builder.new
931
+ lexer = NexusParser::Lexer.new(input)
932
+
933
+ (0..3).each{builder.stub_chr()}
934
+
935
+ NexusParser::Parser.new(lexer,builder).parse_chr_labels
936
+
937
+ foo = builder.nexus_file
938
+ assert_equal 4, foo.characters.size
939
+ assert_equal 'Undefined', foo.characters[0].name
940
+ assert_equal 'Maxillary teeth', foo.characters[1].name
941
+ assert_equal 'as_df', foo.characters[2].name
942
+ assert_equal 'Highest number of maxillary teeth (or alveoli):', foo.characters[3].name
943
+ end
944
+
945
+ def test_parse_state_labels
946
+ input =" STATELABELS
947
+ 1 norm modified,
948
+ 3,
949
+ 4 pres
950
+ ;
951
+ CHARLABELS;
952
+ "
953
+
954
+ builder = NexusParser::Builder.new
955
+ lexer = NexusParser::Lexer.new(input)
956
+
957
+ (0..3).each{builder.stub_chr()}
958
+
959
+ NexusParser::Parser.new(lexer,builder).parse_state_labels
960
+
961
+ foo = builder.nexus_file
962
+ assert_equal 4, foo.characters.size
963
+
964
+ assert_equal "norm", foo.characters[0].states["0"].name
965
+ assert_equal "modified", foo.characters[0].states["1"].name
966
+
967
+ assert_empty foo.characters[1].states
968
+
969
+ assert_empty foo.characters[2].states
970
+
971
+ assert_equal "pres", foo.characters[3].states["0"].name
972
+ end
973
+
974
+ def test_non_label_character_state_character_labels
975
+ input = 'CHARSTATELABELS 1 Tibia_II /
976
+ .5
977
+ .1.2_form
978
+ idsimple
979
+ %_of_length_less_than_10
980
+ !poisonous!
981
+ #_is_3_or_4
982
+ (leave_as_is)
983
+ @12_o_clock
984
+ >2
985
+ ~equal
986
+ =9
987
+ ;'
988
+
989
+ builder = NexusParser::Builder.new
990
+ lexer = NexusParser::Lexer.new(input)
991
+
992
+ builder.stub_chr()
993
+
994
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
995
+
996
+ foo = builder.nexus_file
997
+
998
+ assert_equal ".5", foo.characters[0].states["0"].name
999
+ assert_equal ".1.2_form", foo.characters[0].states["1"].name
1000
+ assert_equal "idsimple", foo.characters[0].states["2"].name
1001
+ assert_equal "%_of_length_less_than_10", foo.characters[0].states["3"].name
1002
+ assert_equal "!poisonous!", foo.characters[0].states["4"].name
1003
+ assert_equal "#_is_3_or_4", foo.characters[0].states["5"].name
1004
+ assert_equal "(leave_as_is)", foo.characters[0].states["6"].name
1005
+ assert_equal "@12_o_clock", foo.characters[0].states["7"].name
1006
+ assert_equal ">2", foo.characters[0].states["8"].name
1007
+ assert_equal "~equal", foo.characters[0].states["9"].name
1008
+ assert_equal "=9", foo.characters[0].states["10"].name
1009
+ end
1010
+
1011
+ def test_arbitrary_quote_and_quotelike_character_state_labels
1012
+ # We could tighten up our handling of accidentally unclosed quotes, but
1013
+ # there's pretty much no way to recover in general, so we're not testing
1014
+ # them here.
1015
+ # Things like ""asdf" " failing is a known issue (maybe not solvable with
1016
+ # regular expressions?).
1017
+ input = 'CHARSTATELABELS 1 Tibia_II /
1018
+ "asd, \'f\'"
1019
+ ""a\'sdf "
1020
+ \' /as"df/\'
1021
+ \'asdf;\'
1022
+ ""as, df""
1023
+ ;'
1024
+
1025
+ builder = NexusParser::Builder.new
1026
+ lexer = NexusParser::Lexer.new(input)
1027
+
1028
+ builder.stub_chr()
1029
+
1030
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
1031
+
1032
+ foo = builder.nexus_file
1033
+
1034
+ assert_equal 'asd, \'f\'', foo.characters[0].states["0"].name
1035
+ assert_equal '"a\'sdf', foo.characters[0].states["1"].name
1036
+ assert_equal '/as"df/', foo.characters[0].states["2"].name
1037
+ assert_equal 'asdf;', foo.characters[0].states["3"].name
1038
+ assert_equal '"as, df"', foo.characters[0].states["4"].name
1039
+ end
1040
+
1041
+
1042
+ def test_number_label_chr_state_labels
1043
+ # Character state names that start with numbers
1044
+ input = 'CHARSTATELABELS 1 Tibia_II /
1045
+ 123abc
1046
+ -1.23abc
1047
+ -3e-3abc
1048
+ 25%_or_less_than
1049
+ ;'
1050
+
1051
+ builder = NexusParser::Builder.new
1052
+ lexer = NexusParser::Lexer.new(input)
1053
+
1054
+ (0..3).each{builder.stub_chr()}
1055
+
1056
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
1057
+
1058
+ foo = builder.nexus_file
1059
+
1060
+ assert_equal "123abc", foo.characters[0].states["0"].name
1061
+ assert_equal "-1.23abc", foo.characters[0].states["1"].name
1062
+ assert_equal "-3e-3abc", foo.characters[0].states["2"].name
1063
+ assert_equal "25%_or_less_than", foo.characters[0].states["3"].name
1064
+ end
1065
+
1066
+ def test_value_pair_label_chr_state_labels
1067
+ # Character state names that are ValuePairs
1068
+ input = 'CHARSTATELABELS 1 Tibia_II /
1069
+ 234=(a_b_c)
1070
+ ;'
1071
+
1072
+ builder = NexusParser::Builder.new
1073
+ lexer = NexusParser::Lexer.new(input)
1074
+
1075
+ builder.stub_chr()
1076
+
1077
+ NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
1078
+
1079
+ foo = builder.nexus_file
1080
+
1081
+ assert_equal '234=(a_b_c)', foo.characters[0].states["0"].name
1082
+ end
1083
+
739
1084
  def DONT_test_parse_really_long_string_of_chr_state_labels
740
1085
  input =" CHARSTATELABELS
741
1086
  1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nexus_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - mjy
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-04-03 00:00:00.000000000 Z
12
+ date: 2024-05-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
128
  - !ruby/object:Gem::Version
129
129
  version: '0'
130
130
  requirements: []
131
- rubygems_version: 3.5.3
131
+ rubygems_version: 3.5.9
132
132
  signing_key:
133
133
  specification_version: 4
134
134
  summary: A Nexus file format (phylogenetic inference) parser in Ruby.