nexus_parser 1.1.4 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  require 'test/unit'
2
2
  require 'rubygems'
3
- require 'ruby-debug'
3
+ require 'byebug'
4
4
 
5
5
  require File.expand_path(File.join(File.dirname(__FILE__), '../lib/nexus_parser'))
6
6
 
@@ -28,7 +28,6 @@ class Test_Regex < Test::Unit::TestCase
28
28
  @regexp = Regexp.new(/\s*(Begin\s*taxa\s*;)\s*/i)
29
29
  assert txt =~ @regexp
30
30
  end
31
-
32
31
  end
33
32
 
34
33
 
@@ -57,6 +56,11 @@ class Test_Lexer < Test::Unit::TestCase
57
56
  assert lexer2.pop(NexusParser::Tokens::LParen)
58
57
  assert lexer2.pop(NexusParser::Tokens::RParen)
59
58
 
59
+ lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
60
+ assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
61
+ assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
62
+ assert lexer2a.pop(NexusParser::Tokens::LBracket)
63
+ assert lexer2a.pop(NexusParser::Tokens::RBracket)
60
64
 
61
65
  lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
62
66
  assert lexer3.pop(NexusParser::Tokens::LBracket)
@@ -66,52 +70,52 @@ class Test_Lexer < Test::Unit::TestCase
66
70
  assert lexer3.pop(NexusParser::Tokens::BeginBlk)
67
71
  assert lexer3.pop(NexusParser::Tokens::ChrsBlk)
68
72
  assert foo = lexer3.pop(NexusParser::Tokens::ID)
69
- assert_equal("BLORF", foo.value)
73
+ assert_equal("BLORF", foo.value)
70
74
  assert lexer3.pop(NexusParser::Tokens::BlkEnd)
71
75
 
72
76
  lexer4 = NexusParser::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
73
77
  assert lexer4.pop(NexusParser::Tokens::BeginBlk)
74
78
  assert lexer4.pop(NexusParser::Tokens::ChrsBlk)
75
79
  assert foo = lexer4.pop(NexusParser::Tokens::Number)
76
- assert_equal(123123123, foo.value)
80
+ assert_equal(123123123, foo.value)
77
81
  assert lexer4.pop(NexusParser::Tokens::BlkEnd)
78
82
 
79
83
  lexer5 = NexusParser::Lexer.new("(0,1)")
80
84
  assert lexer5.pop(NexusParser::Tokens::LParen)
81
85
  assert foo = lexer5.pop(NexusParser::Tokens::Number)
82
- assert_equal(0, foo.value)
86
+ assert_equal(0, foo.value)
83
87
  assert lexer5.pop(NexusParser::Tokens::Comma)
84
88
  assert foo = lexer5.pop(NexusParser::Tokens::Number)
85
- assert_equal(1, foo.value)
89
+ assert_equal(1, foo.value)
86
90
  assert lexer5.pop(NexusParser::Tokens::RParen)
87
91
 
88
92
  lexer6 = NexusParser::Lexer.new(" 210(0,1)10A1\n")
89
93
  assert foo = lexer6.pop(NexusParser::Tokens::RowVec)
90
- assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
94
+ assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
91
95
 
92
96
  lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{3 4 5}(0)(1 a)\n")
93
97
  assert foo = lexer6a.pop(NexusParser::Tokens::RowVec)
94
- assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
95
-
98
+ assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
99
+
96
100
  lexer6b = NexusParser::Lexer.new(" 201{0 1}{0 1}0100)\x0A") # *nix line ending
97
101
  assert foo = lexer6b.pop(NexusParser::Tokens::RowVec)
98
- assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
102
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
99
103
 
100
104
  lexer6c = NexusParser::Lexer.new(" 201{0 1}{0 1}0100)\x0D\x0A") # * dos line ending
101
105
  assert foo = lexer6c.pop(NexusParser::Tokens::RowVec)
102
- assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
106
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
103
107
 
104
108
 
105
109
  lexer7 = NexusParser::Lexer.new("read nothing till Nexus, not that nexus 13243 Block [] ();, this one: #nexus FOO")
106
110
  assert foo = lexer7.pop(NexusParser::Tokens::NexusStart)
107
- assert_equal('#nexus', foo.value)
111
+ assert_equal('#nexus', foo.value)
108
112
 
109
113
 
110
114
  ## we strip comments before parsing now
111
115
  # lexer8 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
112
116
  # assert foo = lexer8.pop(NexusParser::Tokens::NexusComment)
113
117
  # assert_equal "foo", foo.value
114
-
118
+
115
119
  # assert lexer.pop(NexusParser::Tokens::Colon)
116
120
  # assert num = lexer.pop(NexusParser::Tokens::Number)
117
121
  # assert_equal(num.value, 0.0)
@@ -122,7 +126,7 @@ class Test_Lexer < Test::Unit::TestCase
122
126
  def test_row_vec
123
127
  lexer = NexusParser::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
124
128
  assert foo = lexer.pop(NexusParser::Tokens::RowVec)
125
- assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "BD", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
129
+ assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "BD", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
126
130
  end
127
131
 
128
132
  def test_punctuation
@@ -145,15 +149,15 @@ class Test_Lexer < Test::Unit::TestCase
145
149
  def test_tax_labels
146
150
  lexer = NexusParser::Lexer.new("Taxlabels 'foo' bar blorf \"stuff things\" stuff 'and foo';")
147
151
  assert foo = lexer.pop(NexusParser::Tokens::Taxlabels)
148
- assert_equal("Taxlabels ", foo.value)
152
+ assert_equal("Taxlabels ", foo.value)
149
153
  end
150
154
 
151
155
  def test_EndBlk
152
156
  lexer = NexusParser::Lexer.new(" \n\n End ;")
153
157
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
154
- lexer = NexusParser::Lexer.new("\n\nEnd;")
158
+ lexer = NexusParser::Lexer.new("\n\nEndblock;")
155
159
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
156
-
160
+
157
161
  lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
158
162
  assert !lexer.peek(NexusParser::Tokens::EndBlk)
159
163
  lexer = NexusParser::Lexer.new("this is not an \"end\"\n\nEnd;")
@@ -167,27 +171,27 @@ class Test_Lexer < Test::Unit::TestCase
167
171
  end
168
172
 
169
173
  def test_label
170
- lexer = NexusParser::Lexer.new(' \'foo\' bar, blorf; "stuff things" stuff \'and foo\' 23434 ""asdf"" \'Foo_And_Stuff\' ')
171
- assert foo = lexer.pop(NexusParser::Tokens::Label)
172
- assert_equal "foo", foo.value
173
- assert foo = lexer.pop(NexusParser::Tokens::Label)
174
- assert_equal "bar", foo.value
175
- assert lexer.pop(NexusParser::Tokens::Comma)
176
- assert foo = lexer.pop(NexusParser::Tokens::Label)
177
- assert_equal "blorf", foo.value
178
- assert lexer.pop(NexusParser::Tokens::SemiColon)
179
- assert foo = lexer.pop(NexusParser::Tokens::Label)
180
- assert_equal "stuff things", foo.value
181
- assert foo = lexer.pop(NexusParser::Tokens::Label)
182
- assert_equal "stuff", foo.value
183
- assert foo = lexer.pop(NexusParser::Tokens::Label)
184
- assert_equal "and foo", foo.value
185
- assert foo = lexer.pop(NexusParser::Tokens::Label)
186
- assert_equal "23434", foo.value
187
- assert foo = lexer.pop(NexusParser::Tokens::Label)
188
- assert_equal '"asdf"', foo.value
189
- assert foo = lexer.pop(NexusParser::Tokens::Label)
190
- assert_equal 'Foo_And_Stuff', foo.value
174
+ lexer = NexusParser::Lexer.new(' \'foo\' bar, blorf; "stuff things" stuff \'and foo\' 23434 ""asdf"" \'Foo_And_Stuff\' ')
175
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
176
+ assert_equal "foo", foo.value
177
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
178
+ assert_equal "bar", foo.value
179
+ assert lexer.pop(NexusParser::Tokens::Comma)
180
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
181
+ assert_equal "blorf", foo.value
182
+ assert lexer.pop(NexusParser::Tokens::SemiColon)
183
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
184
+ assert_equal "stuff things", foo.value
185
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
186
+ assert_equal "stuff", foo.value
187
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
188
+ assert_equal "and foo", foo.value
189
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
190
+ assert_equal "23434", foo.value
191
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
192
+ assert_equal '"asdf"', foo.value
193
+ assert foo = lexer.pop(NexusParser::Tokens::Label)
194
+ assert_equal 'Foo_And_Stuff', foo.value
191
195
  end
192
196
 
193
197
  def test_odd_labels
@@ -219,14 +223,14 @@ class Test_Lexer < Test::Unit::TestCase
219
223
 
220
224
 
221
225
  def test_dimensions
222
- input = " DIMENSIONS NCHAR= 10"
226
+ input = " DIMENSIONS NCHAR= 10"
223
227
  lexer = NexusParser::Lexer.new(input)
224
228
  assert foo = lexer.pop(NexusParser::Tokens::Dimensions)
225
229
  assert_equal "DIMENSIONS", foo.value
226
230
  end
227
231
 
228
232
  def test_format
229
- input = " format NCHAR= 10"
233
+ input = " format NCHAR= 10"
230
234
  lexer = NexusParser::Lexer.new(input)
231
235
  assert foo = lexer.pop(NexusParser::Tokens::Format)
232
236
  assert_equal "format", foo.value
@@ -234,7 +238,7 @@ class Test_Lexer < Test::Unit::TestCase
234
238
 
235
239
  def test_odd_value_pair
236
240
  lexer = NexusParser::Lexer.new(" TEXT CHARACTER = 3 TEXT = A62.003;
237
-
241
+
238
242
  TEXT CHARACTER = 4 TEXT = A62.004; \n end; ")
239
243
  assert foo = lexer.pop(NexusParser::Tokens::Label)
240
244
  assert foo = lexer.pop(NexusParser::Tokens::ValuePair)
@@ -299,38 +303,38 @@ class Test_Lexer < Test::Unit::TestCase
299
303
  assert foo = lexer6.pop(NexusParser::Tokens::ValuePair)
300
304
  smorf = {:missing => '-'}
301
305
  assert_equal smorf, foo.value
302
-
306
+
303
307
  lexer6a = NexusParser::Lexer.new("ntaxa=1;\n")
304
308
  assert foo = lexer6a.pop(NexusParser::Tokens::ValuePair)
305
309
  smorf = {:ntaxa => '1'}
306
- assert_equal smorf, foo.value
310
+ assert_equal smorf, foo.value
307
311
 
308
312
  lexer7 = NexusParser::Lexer.new("ntaxa =1;\n")
309
313
  assert foo = lexer7.pop(NexusParser::Tokens::ValuePair)
310
314
  smorf = {:ntaxa => '1'}
311
- assert_equal smorf, foo.value
312
-
315
+ assert_equal smorf, foo.value
316
+
313
317
  lexer8 = NexusParser::Lexer.new(" ntaxa = 1 ;\n")
314
318
  assert foo = lexer8.pop(NexusParser::Tokens::ValuePair)
315
319
  smorf = {:ntaxa => '1'}
316
- assert_equal smorf, foo.value
320
+ assert_equal smorf, foo.value
317
321
 
318
322
  lexer9 = NexusParser::Lexer.new(" TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ")
319
323
  assert foo = lexer9.pop(NexusParser::Tokens::ValuePair)
320
324
  smorf = {:tf => "(CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!')" }
321
- assert_equal smorf, foo.value
322
-
325
+ assert_equal smorf, foo.value
326
+
323
327
  lexer10 = NexusParser::Lexer.new(" TF = (CM 'This is an value pair that has (parens) within the value, twice! ()') ; some stuff left here ")
324
328
  assert foo = lexer10.pop(NexusParser::Tokens::ValuePair)
325
329
  smorf = {:tf => "(CM 'This is an value pair that has (parens) within the value, twice! ()')" }
326
- assert_equal smorf, foo.value
327
-
330
+ assert_equal smorf, foo.value
331
+
328
332
  lexer11 = NexusParser::Lexer.new("CHARACTER = 1 TEXT = A62.001;")
329
333
  assert_equal true, !lexer11.peek(NexusParser::Tokens::SemiColon)
330
334
  assert_equal true, lexer11.peek(NexusParser::Tokens::ValuePair)
331
335
  assert foo = lexer11.pop(NexusParser::Tokens::ValuePair)
332
336
  smorf = {:character => "1" }
333
- assert_equal smorf, foo.value
337
+ assert_equal smorf, foo.value
334
338
  assert foo = lexer11.pop(NexusParser::Tokens::ValuePair)
335
339
  end
336
340
 
@@ -342,7 +346,7 @@ class Test_Lexer < Test::Unit::TestCase
342
346
  end
343
347
 
344
348
  def test_TreesBlk
345
- lexer = NexusParser::Lexer.new("BEGIN TREES;
349
+ lexer = NexusParser::Lexer.new("BEGIN TREES;
346
350
  Title Imported_trees;
347
351
  LINK Taxa = 'Scharff&Coddington_1997_Araneidae';
348
352
  TRANSLATE
@@ -376,7 +380,7 @@ class Test_Lexer < Test::Unit::TestCase
376
380
 
377
381
 
378
382
  END;")
379
-
383
+
380
384
  assert lexer.pop(NexusParser::Tokens::BeginBlk)
381
385
  assert foo = lexer.pop(NexusParser::Tokens::TreesBlk)
382
386
  assert_equal 'TREES', foo.value.slice(0,5)
@@ -387,39 +391,39 @@ class Test_Lexer < Test::Unit::TestCase
387
391
  end
388
392
 
389
393
  def test_NotesBlk
390
- input = "BEGIN NOTES ;"
394
+ input = "BEGIN NOTES ;"
391
395
  lexer = NexusParser::Lexer.new(input)
392
396
  assert lexer.pop(NexusParser::Tokens::BeginBlk)
393
397
  assert foo = lexer.pop(NexusParser::Tokens::NotesBlk)
394
398
  assert "NOTES", foo.value
395
399
  end
396
400
 
397
- def test_LabelsBlk
398
- lexer = NexusParser::Lexer.new("
399
- LABELS;
400
- CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
401
- CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
402
- CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
403
- CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
401
+ def test_LabelsBlk
402
+ lexer = NexusParser::Lexer.new("
403
+ LABELS;
404
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
405
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
406
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
407
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
404
408
 
405
409
 
406
- END;
410
+ ENDBLOCK;
411
+
412
+ BEGIN some other block;")
407
413
 
408
- BEGIN some other block;")
409
-
410
414
  assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
411
415
  assert_equal 'LABELS', foo.value.slice(0,6)
412
- assert_equal 'END;', foo.value.slice(-4,4)
416
+ assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
413
417
  end
414
418
 
415
- def test_SetsBlk
416
- lexer = NexusParser::Lexer.new("
417
- SETS;
418
- CHARPARTITION * UNTITLED = Somatic : 1 - 2 4, MM_Genitalia : 5 - 8 10;
419
+ def test_SetsBlk
420
+ lexer = NexusParser::Lexer.new("
421
+ SETS;
422
+ CHARPARTITION * UNTITLED = Somatic : 1 - 2 4, MM_Genitalia : 5 - 8 10;
423
+
424
+ END;
425
+ BEGIN some other block;")
419
426
 
420
- END;
421
- BEGIN some other block;")
422
-
423
427
  assert foo = lexer.pop(NexusParser::Tokens::SetsBlk)
424
428
  assert_equal 'SETS', foo.value.slice(0,4)
425
429
  assert_equal 'END;', foo.value.slice(-4,4)
@@ -441,17 +445,17 @@ class Test_Parser < Test::Unit::TestCase
441
445
  def teardown
442
446
  @nf = nil
443
447
  end
444
-
448
+
445
449
  def test_that_file_might_be_nexus
446
450
  begin
447
451
  assert !parse_nexus_file("#Nexblux Begin Natrix end;")
448
- rescue NexusParser::ParseError
452
+ rescue NexusParser::ParseError
449
453
  assert true
450
454
  end
451
455
  end
452
456
 
453
457
  def test_parse_initializes
454
- foo = parse_nexus_file(@nf)
458
+ parse_nexus_file(@nf)
455
459
  end
456
460
 
457
461
  def test_parse_file
@@ -460,7 +464,7 @@ class Test_Parser < Test::Unit::TestCase
460
464
 
461
465
  assert_equal 10, foo.taxa.size
462
466
  assert_equal 10, foo.characters.size
463
- assert_equal 10, foo.codings.size
467
+ assert_equal 10, foo.codings.size
464
468
  assert_equal 1, foo.taxa[1].notes.size # asserts that notes are parsing
465
469
  assert_equal "norm", foo.characters[0].states["0"].name
466
470
  assert_equal "modified", foo.characters[0].states["1"].name
@@ -471,11 +475,11 @@ class Test_Parser < Test::Unit::TestCase
471
475
 
472
476
  def test_taxa_block
473
477
  # we've popped off the header already
474
- input =
478
+ input =
475
479
  "TITLE 'Scharff&Coddington_1997_Araneidae';
476
480
  DIMENSIONS NTAX=10;
477
481
  TAXLABELS
478
- Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
482
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
479
483
  ;
480
484
  IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b137 JC1191fcddc2b136 ;
481
485
  BLOCKID JC1191fcddc0c4;
@@ -484,7 +488,7 @@ class Test_Parser < Test::Unit::TestCase
484
488
  builder = NexusParser::Builder.new
485
489
  lexer = NexusParser::Lexer.new(input)
486
490
  NexusParser::Parser.new(lexer,builder).parse_taxa_blk
487
- foo = builder.nexus_file
491
+ foo = builder.nexus_file
488
492
 
489
493
  assert_equal 10, foo.taxa.size
490
494
  assert_equal "Dictyna", foo.taxa[0].name
@@ -495,18 +499,18 @@ class Test_Parser < Test::Unit::TestCase
495
499
 
496
500
  def test_taxa_block_without_IDS
497
501
  # we've popped off the header already
498
- input =
502
+ input =
499
503
  "TITLE 'Scharff&Coddington_1997_Araneidae';
500
504
  DIMENSIONS NTAX=10;
501
505
  TAXLABELS
502
- Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
506
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
503
507
  ;
504
508
  END;"
505
509
 
506
510
  builder = NexusParser::Builder.new
507
511
  lexer = NexusParser::Lexer.new(input)
508
512
  NexusParser::Parser.new(lexer,builder).parse_taxa_blk
509
- foo = builder.nexus_file
513
+ foo = builder.nexus_file
510
514
 
511
515
  assert_equal 10, foo.taxa.size
512
516
  assert_equal "Dictyna", foo.taxa[0].name
@@ -515,15 +519,13 @@ class Test_Parser < Test::Unit::TestCase
515
519
  assert_equal "Tetragnatha", foo.taxa[9].name
516
520
  end
517
521
 
518
-
519
-
520
522
  def test_parse_characters_blk
521
523
  input= "
522
524
  TITLE 'Scharff&Coddington_1997_Araneidae';
523
525
  DIMENSIONS NCHAR=10;
524
526
  FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
525
- CHARSTATELABELS
526
- 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
527
+ CHARSTATELABELS
528
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
527
529
  MATRIX
528
530
  Dictyna 0?00201001
529
531
  Uloborus 0?11000000
@@ -552,10 +554,10 @@ class Test_Parser < Test::Unit::TestCase
552
554
  (0..9).each{|i| builder.stub_taxon}
553
555
 
554
556
  NexusParser::Parser.new(@lexer,builder).parse_characters_blk
555
- foo = builder.nexus_file
556
-
557
+ foo = builder.nexus_file
558
+
557
559
  assert_equal 10, foo.characters.size
558
- assert_equal "Tibia_II", foo.characters[0].name
560
+ assert_equal "Tibia_II", foo.characters[0].name
559
561
  assert_equal "TII_macrosetae", foo.characters[1].name
560
562
 
561
563
  assert_equal "norm", foo.characters[0].states["0"].name
@@ -573,11 +575,11 @@ class Test_Parser < Test::Unit::TestCase
573
575
  end
574
576
 
575
577
  def test_characters_block_without_IDs_or_title
576
- input= "
578
+ input= "
577
579
  DIMENSIONS NCHAR=10;
578
580
  FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
579
- CHARSTATELABELS
580
- 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
581
+ CHARSTATELABELS
582
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
581
583
  MATRIX
582
584
  Dictyna 0?00201001
583
585
  Uloborus 0?11000000
@@ -591,7 +593,7 @@ class Test_Parser < Test::Unit::TestCase
591
593
  Tetragnatha 0?01011011
592
594
 
593
595
  ;
594
- END;"
596
+ ENDBLOCK;"
595
597
 
596
598
  builder = NexusParser::Builder.new
597
599
  @lexer = NexusParser::Lexer.new(input)
@@ -603,10 +605,10 @@ class Test_Parser < Test::Unit::TestCase
603
605
  (0..9).each{|i| builder.stub_taxon}
604
606
 
605
607
  NexusParser::Parser.new(@lexer,builder).parse_characters_blk
606
- foo = builder.nexus_file
607
-
608
+ foo = builder.nexus_file
609
+
608
610
  assert_equal 10, foo.characters.size
609
- assert_equal "Tibia_II", foo.characters[0].name
611
+ assert_equal "Tibia_II", foo.characters[0].name
610
612
  assert_equal "TII_macrosetae", foo.characters[1].name
611
613
  assert_equal "norm", foo.characters[0].states["0"].name
612
614
  assert_equal "modified", foo.characters[0].states["1"].name
@@ -618,21 +620,21 @@ class Test_Parser < Test::Unit::TestCase
618
620
 
619
621
  def test_characters_block_from_file
620
622
  foo = parse_nexus_file(@nf)
621
- assert 10, foo.characters.size
623
+ assert_equal 10, foo.characters.size
622
624
  end
623
625
 
624
626
  def test_codings
625
627
  foo = parse_nexus_file(@nf)
626
- assert 100, foo.codings.size # two multistates count in single cells
628
+ assert_equal 100, foo.codings.flatten.size # two multistates count in single cells
627
629
  end
628
630
 
629
631
  def test_parse_dimensions
630
- input= " DIMENSIONS NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" blorf=2; "
632
+ input= " DIMENSIONS NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" blorf=2; "
631
633
  builder = NexusParser::Builder.new
632
634
  lexer = NexusParser::Lexer.new(input)
633
635
 
634
636
  NexusParser::Parser.new(lexer,builder).parse_dimensions
635
- foo = builder.nexus_file
637
+ foo = builder.nexus_file
636
638
 
637
639
  assert_equal "10", foo.vars[:nchar]
638
640
  assert_equal "10", foo.vars[:ntaxa]
@@ -648,7 +650,7 @@ class Test_Parser < Test::Unit::TestCase
648
650
  lexer = NexusParser::Lexer.new(input)
649
651
 
650
652
  NexusParser::Parser.new(lexer,builder).parse_format
651
- foo = builder.nexus_file
653
+ foo = builder.nexus_file
652
654
 
653
655
  assert_equal "STANDARD", foo.vars[:datatype]
654
656
  assert_equal "-", foo.vars[:gap]
@@ -657,20 +659,34 @@ class Test_Parser < Test::Unit::TestCase
657
659
  # add test that nothing is left in lexer
658
660
  end
659
661
 
662
+ def test_parse_format_respect_case
663
+ input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
664
+ builder = NexusParser::Builder.new
665
+ lexer = NexusParser::Lexer.new(input)
666
+
667
+ NexusParser::Parser.new(lexer,builder).parse_format
668
+ foo = builder.nexus_file
669
+
670
+ assert_equal "STANDARD", foo.vars[:datatype]
671
+ assert_equal "-", foo.vars[:gap]
672
+ assert_equal "?", foo.vars[:missing]
673
+ assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
674
+ end
675
+
660
676
  def test_parse_chr_state_labels
661
677
  input =" CHARSTATELABELS
662
678
  1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
663
679
  MATRIX
664
680
  fooo 01 more stuff here that should not be hit"
665
-
681
+
666
682
  builder = NexusParser::Builder.new
667
683
  lexer = NexusParser::Lexer.new(input)
668
-
684
+
669
685
  (0..9).each{builder.stub_chr()}
670
-
686
+
671
687
  NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
672
688
 
673
- foo = builder.nexus_file
689
+ foo = builder.nexus_file
674
690
  assert_equal 10, foo.characters.size
675
691
  assert_equal "Tibia_II", foo.characters[0].name
676
692
  assert_equal "norm", foo.characters[0].states["0"].name
@@ -692,7 +708,7 @@ class Test_Parser < Test::Unit::TestCase
692
708
  assert_equal "dorsal", foo.characters[4].states["0"].name
693
709
  assert_equal "mesal", foo.characters[4].states["1"].name
694
710
  assert_equal "lateral", foo.characters[4].states["2"].name
695
-
711
+
696
712
  assert_equal "Paracymbium", foo.characters[5].name
697
713
  assert_equal "abs", foo.characters[5].states["0"].name
698
714
  assert_equal "pres", foo.characters[5].states["1"].name
@@ -717,15 +733,15 @@ class Test_Parser < Test::Unit::TestCase
717
733
  29 'Metatarsal trichobothria (CodAra.29)' / 37623 '>2', 30 'Spinneret cuticle (CodAra.30)' / annulate ridged squamate;
718
734
  Matrix
719
735
  fooo 01 more stuff here that should not be hit"
720
-
736
+
721
737
  builder = NexusParser::Builder.new
722
738
  lexer = NexusParser::Lexer.new(input)
723
739
 
724
- (0..29).each{builder.stub_chr()}
725
-
740
+ (0..29).each{builder.stub_chr()}
741
+
726
742
  NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
727
743
 
728
- foo = builder.nexus_file
744
+ foo = builder.nexus_file
729
745
 
730
746
  assert_equal "Metatarsal trichobothria (CodAra.29)", foo.characters[28].name
731
747
  assert_equal "37623", foo.characters[28].states["0"].name
@@ -739,19 +755,19 @@ class Test_Parser < Test::Unit::TestCase
739
755
  end
740
756
 
741
757
  def DONT_test_parse_really_long_string_of_chr_state_labels
742
- input =" CHARSTATELABELS
743
- 1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
758
+ input =" CHARSTATELABELS
759
+ 1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
744
760
  MATRIX
745
761
  fooo 01 more stuff here that should not be hit"
746
-
762
+
747
763
  builder = NexusParser::Builder.new
748
764
  lexer = NexusParser::Lexer.new(input)
749
-
765
+
750
766
  (0..147).each{builder.stub_chr()}
751
-
767
+
752
768
  NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
753
769
 
754
- foo = builder.nexus_file
770
+ foo = builder.nexus_file
755
771
  assert_equal 10, foo.characters.size
756
772
  assert_equal "Tibia_II", foo.characters[0].name
757
773
  assert_equal "norm", foo.characters[0].states["0"].name
@@ -773,7 +789,7 @@ class Test_Parser < Test::Unit::TestCase
773
789
  assert_equal "dorsal", foo.characters[4].states["0"].name
774
790
  assert_equal "mesal", foo.characters[4].states["1"].name
775
791
  assert_equal "lateral", foo.characters[4].states["2"].name
776
-
792
+
777
793
  assert_equal "Paracymbium", foo.characters[5].name
778
794
  assert_equal "abs", foo.characters[5].states["0"].name
779
795
  assert_equal "pres", foo.characters[5].states["1"].name
@@ -791,12 +807,12 @@ class Test_Parser < Test::Unit::TestCase
791
807
  assert_equal "Median_apophysis", foo.characters[9].name
792
808
  assert_equal "pres", foo.characters[9].states["0"].name
793
809
  assert_equal "abs", foo.characters[9].states["1"].name
794
- end
810
+ end
795
811
 
796
812
 
797
813
 
798
814
  def test_parse_notes_blk
799
- input ="
815
+ input ="
800
816
  TEXT TAXA = 'Scharff&Coddington_1997_Araneidae' TAXON = 2 TEXT = 'This is a footnote to taxon 2, Uloborus';
801
817
 
802
818
  TEXT TAXON = 4 CHARACTER = 8 TEXT = This_is_a_footnote_to_a_cell.;
@@ -813,35 +829,35 @@ class Test_Parser < Test::Unit::TestCase
813
829
 
814
830
  AN T = 2 C = 6 A = JC DC = 2008.4.13.20.35.20 DM = 2008.4.13.20.35.36 ID = JC1194a5b7e1a3 I = _ TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ;
815
831
 
816
- AN T = 7 C = 10 A = 0 DC = 2008.4.20.17.25.11 DM = 2008.4.20.17.26.1 ID = 01196db9ebd25 I = _ TF = (CM 'this is an annotation^nwith several hard returns^nfor a cell of taxa 6, chr 9 (from zero)^ncoded as -') ;
817
-
832
+ AN T = 7 C = 10 A = 0 DC = 2008.4.20.17.25.11 DM = 2008.4.20.17.26.1 ID = 01196db9ebd25 I = _ TF = (CM 'this is an annotation^nwith several hard returns^nfor a cell of taxa 6, chr 9 (from zero)^ncoded as -') ;
833
+
818
834
  AN T = 2 C = 6 A = JC DC = 2008.4.13.20.35.20 DM = 2008.4.13.20.35.36 ID = JC1194a5b7e1a3 I = _ TF = (CM 'This is ANOTHER annotation that haa a hard return in it^n^n^n^nSo there!') ;
819
835
 
820
836
  END; Don't parse this bit, eh?"
821
-
837
+
822
838
  # note the second last note note embedds parens in the value
823
-
839
+
824
840
  builder = NexusParser::Builder.new
825
841
  lexer = NexusParser::Lexer.new(input)
826
-
842
+
827
843
  # stubs
828
844
  (0..9).each{builder.stub_chr()}
829
845
  (0..9).each{builder.stub_taxon()}
830
846
  builder.nexus_file.codings[3] = []
831
847
  builder.nexus_file.codings[3][7] = NexusParser::NexusParser::Coding.new()
832
- builder.nexus_file.codings[8] = []
848
+ builder.nexus_file.codings[8] = []
833
849
  builder.nexus_file.codings[8][2] = NexusParser::NexusParser::Coding.new()
834
850
  builder.nexus_file.codings[1] = []
835
851
  builder.nexus_file.codings[1][5] = NexusParser::NexusParser::Coding.new()
836
- builder.nexus_file.codings[6] = []
852
+ builder.nexus_file.codings[6] = []
837
853
  builder.nexus_file.codings[6][9] = NexusParser::NexusParser::Coding.new()
838
- builder.nexus_file.codings[3] = []
854
+ builder.nexus_file.codings[3] = []
839
855
  builder.nexus_file.codings[3][7] = NexusParser::NexusParser::Coding.new()
840
856
 
841
857
  NexusParser::Parser.new(lexer,builder).parse_notes_blk
842
858
 
843
- foo = builder.nexus_file
844
-
859
+ foo = builder.nexus_file
860
+
845
861
  # make sure stubs are setup
846
862
  assert_equal 10, foo.characters.size
847
863
  assert_equal 10, foo.taxa.size
@@ -849,7 +865,7 @@ class Test_Parser < Test::Unit::TestCase
849
865
  assert_equal 1, foo.taxa[1].notes.size
850
866
  assert_equal 1, foo.codings[3][7].notes.size
851
867
  assert_equal 'This_is_a_footnote_to_a_cell.', foo.codings[3][7].notes[0].note
852
-
868
+
853
869
  assert_equal 1, foo.characters[9].notes.size
854
870
  assert_equal 'This_is_footnote_to_char_10', foo.characters[9].notes[0].note
855
871
 
@@ -863,7 +879,7 @@ class Test_Parser < Test::Unit::TestCase
863
879
  assert_equal 2, foo.codings[1][5].notes.size # TWO!!
864
880
  assert_equal 1, foo.codings[3][7].notes.size
865
881
 
866
-
882
+
867
883
  assert_equal "This_is_a_footnote_to_a_cell.", foo.codings[3][7].notes[0].note
868
884
 
869
885
  assert_equal "This is an annotation to chr 3, taxa 9, coded ?", foo.codings[8][2].notes[0].note
@@ -885,9 +901,9 @@ class Test_Parser < Test::Unit::TestCase
885
901
  TEXT CHARACTER = 8 TEXT = A62.008;
886
902
  end;
887
903
  "
888
-
904
+
889
905
  # note the second last note note embeds parens in the value
890
-
906
+
891
907
  builder = NexusParser::Builder.new
892
908
  lexer = NexusParser::Lexer.new(input)
893
909
  # stubs
@@ -895,11 +911,11 @@ class Test_Parser < Test::Unit::TestCase
895
911
 
896
912
  NexusParser::Parser.new(lexer,builder).parse_notes_blk
897
913
 
898
- foo = builder.nexus_file
899
-
914
+ foo = builder.nexus_file
915
+
900
916
  # make sure stubs are setup
901
917
  assert_equal 10, foo.characters.size
902
-
918
+
903
919
  assert_equal 'A62.001', foo.characters[0].notes[0].note
904
920
  assert_equal 'A62.002', foo.characters[1].notes[0].note
905
921
  assert_equal 'A62.003', foo.characters[2].notes[0].note
@@ -926,18 +942,16 @@ class Test_Parser < Test::Unit::TestCase
926
942
 
927
943
  def DONT_test_misc
928
944
  nf = File.read('foo.nex') # MX_test_01.nex
929
- foo = parse_nexus_file(nf)
945
+ foo = parse_nexus_file(nf)
930
946
  assert true, foo
931
947
  end
932
948
 
933
- def test_misc
934
-
949
+ def DONT_test_misc2
950
+ # omit("test file doesn't currently exist")
935
951
  assert nf = File.read(File.expand_path(File.join(File.dirname(__FILE__), '../test/Aptostichus.nex')) )
936
- foo = parse_nexus_file(nf)
952
+ foo = parse_nexus_file(nf)
937
953
  assert true, foo
938
954
  end
939
955
 
940
-
941
-
942
956
  end
943
957