nexus_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,937 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'ruby-debug'
4
+
5
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/nexus_file'))
6
+
7
+ class NexusParserTest < Test::Unit::TestCase
8
+ def test_truth
9
+ assert true
10
+ end
11
+ end
12
+
13
+ class Test_NexusFile_Builder < Test::Unit::TestCase
14
+ def test_builder
15
+ b = NexusFile::Builder.new
16
+ assert foo = b.nexus_file
17
+ assert_equal [], foo.taxa
18
+ assert_equal [], foo.characters
19
+ assert_equal [], foo.codings
20
+ assert_equal [], foo.sets
21
+ end
22
+ end
23
+
24
+
25
+ class Test_Regex < Test::Unit::TestCase
26
+ def test_begin_taxa
27
+ txt = " aslkfja\n Begin taxa; BLorf end; "
28
+ @regexp = Regexp.new(/\s*(Begin\s*taxa\s*;)\s*/i)
29
+ assert txt =~ @regexp
30
+ end
31
+
32
+ end
33
+
34
+
35
+ class Test_Lexer < Test::Unit::TestCase
36
+ def test_lexer
37
+ lexer = NexusFile::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
38
+ assert lexer.pop(NexusFile::Tokens::LBracket)
39
+ assert id = lexer.pop(NexusFile::Tokens::ID)
40
+ assert_equal(id.value, "foo")
41
+ assert lexer.pop(NexusFile::Tokens::RBracket)
42
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
43
+ assert lexer.pop(NexusFile::Tokens::TaxaBlk)
44
+ assert foo = lexer.pop(NexusFile::Tokens::ID)
45
+ assert_equal("BLORF", foo.value) # truncating whitespace
46
+ assert lexer.pop(NexusFile::Tokens::BlkEnd)
47
+
48
+ lexer2 = NexusFile::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
49
+ assert lexer2.pop(NexusFile::Tokens::LBracket)
50
+ assert id = lexer2.pop(NexusFile::Tokens::ID)
51
+ assert_equal(id.value, "foo")
52
+ assert lexer2.pop(NexusFile::Tokens::RBracket)
53
+ assert lexer2.pop(NexusFile::Tokens::BeginBlk)
54
+ assert lexer2.pop(NexusFile::Tokens::AuthorsBlk)
55
+ assert lexer2.pop(NexusFile::Tokens::LBracket)
56
+ assert lexer2.pop(NexusFile::Tokens::RBracket)
57
+ assert lexer2.pop(NexusFile::Tokens::LParen)
58
+ assert lexer2.pop(NexusFile::Tokens::RParen)
59
+
60
+
61
+ lexer3 = NexusFile::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
62
+ assert lexer3.pop(NexusFile::Tokens::LBracket)
63
+ assert id = lexer3.pop(NexusFile::Tokens::ID)
64
+ assert_equal(id.value, "foo")
65
+ assert lexer3.pop(NexusFile::Tokens::RBracket)
66
+ assert lexer3.pop(NexusFile::Tokens::BeginBlk)
67
+ assert lexer3.pop(NexusFile::Tokens::ChrsBlk)
68
+ assert foo = lexer3.pop(NexusFile::Tokens::ID)
69
+ assert_equal("BLORF", foo.value)
70
+ assert lexer3.pop(NexusFile::Tokens::BlkEnd)
71
+
72
+ lexer4 = NexusFile::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
73
+ assert lexer4.pop(NexusFile::Tokens::BeginBlk)
74
+ assert lexer4.pop(NexusFile::Tokens::ChrsBlk)
75
+ assert foo = lexer4.pop(NexusFile::Tokens::Number)
76
+ assert_equal(123123123, foo.value)
77
+ assert lexer4.pop(NexusFile::Tokens::BlkEnd)
78
+
79
+ lexer5 = NexusFile::Lexer.new("(0,1)")
80
+ assert lexer5.pop(NexusFile::Tokens::LParen)
81
+ assert foo = lexer5.pop(NexusFile::Tokens::Number)
82
+ assert_equal(0, foo.value)
83
+ assert lexer5.pop(NexusFile::Tokens::Comma)
84
+ assert foo = lexer5.pop(NexusFile::Tokens::Number)
85
+ assert_equal(1, foo.value)
86
+ assert lexer5.pop(NexusFile::Tokens::RParen)
87
+
88
+ lexer6 = NexusFile::Lexer.new(" 210(0,1)10A1\n")
89
+ assert foo = lexer6.pop(NexusFile::Tokens::RowVec)
90
+ assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
91
+
92
+ lexer6a = NexusFile::Lexer.new(" 21a(0 1)0b{3 4 5}(0)(1 a)\n")
93
+ assert foo = lexer6a.pop(NexusFile::Tokens::RowVec)
94
+ assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
95
+
96
+ lexer6b = NexusFile::Lexer.new(" 201{0 1}{0 1}0100)\x0A") # *nix line ending
97
+ assert foo = lexer6b.pop(NexusFile::Tokens::RowVec)
98
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
99
+
100
+ lexer6c = NexusFile::Lexer.new(" 201{0 1}{0 1}0100)\x0D\x0A") # * dos line ending
101
+ assert foo = lexer6c.pop(NexusFile::Tokens::RowVec)
102
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
103
+
104
+
105
+ lexer7 = NexusFile::Lexer.new("read nothing till Nexus, not that nexus 13243 Block [] ();, this one: #nexus FOO")
106
+ assert foo = lexer7.pop(NexusFile::Tokens::NexusStart)
107
+ assert_equal('#nexus', foo.value)
108
+
109
+
110
+ ## we strip comments before parsing now
111
+ # lexer8 = NexusFile::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
112
+ # assert foo = lexer8.pop(NexusFile::Tokens::NexusComment)
113
+ # assert_equal "foo", foo.value
114
+
115
+ # assert lexer.pop(NexusFile::Tokens::Colon)
116
+ # assert num = lexer.pop(NexusFile::Tokens::Number)
117
+ # assert_equal(num.value, 0.0)
118
+ # assert lexer.pop(NexusFile::Tokens::Comma)
119
+ # assert lexer.pop(NexusFile::Tokens::SemiColon)
120
+ end
121
+
122
+ def test_row_vec
123
+ lexer = NexusFile::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
124
+ assert foo = lexer.pop(NexusFile::Tokens::RowVec)
125
+ assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "BD", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
126
+ end
127
+
128
+ def test_punctuation
129
+ lexer = NexusFile::Lexer.new(',/=](\'NOT23\'[);,')
130
+ assert lexer.peek(NexusFile::Tokens::Comma)
131
+ assert lexer.pop(NexusFile::Tokens::Comma)
132
+ assert lexer.pop(NexusFile::Tokens::BckSlash)
133
+ assert lexer.pop(NexusFile::Tokens::Equals)
134
+ assert lexer.pop(NexusFile::Tokens::RBracket)
135
+ assert lexer.pop(NexusFile::Tokens::LParen)
136
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
137
+ assert_equal "NOT23", foo.value
138
+ assert lexer.pop(NexusFile::Tokens::LBracket)
139
+ assert lexer.pop(NexusFile::Tokens::RParen)
140
+ assert lexer.pop(NexusFile::Tokens::SemiColon)
141
+ assert lexer.pop(NexusFile::Tokens::Comma)
142
+
143
+ end
144
+
145
+ def test_tax_labels
146
+ lexer = NexusFile::Lexer.new("Taxlabels 'foo' bar blorf \"stuff things\" stuff 'and foo';")
147
+ assert foo = lexer.pop(NexusFile::Tokens::Taxlabels)
148
+ assert_equal("Taxlabels ", foo.value)
149
+ end
150
+
151
+ def test_EndBlk
152
+ lexer = NexusFile::Lexer.new(" \n\n End ;")
153
+ assert foo = lexer.pop(NexusFile::Tokens::EndBlk)
154
+ lexer = NexusFile::Lexer.new("\n\nEnd;")
155
+ assert foo = lexer.pop(NexusFile::Tokens::EndBlk)
156
+
157
+ lexer = NexusFile::Lexer.new("123123 \n\nEnd;")
158
+ assert !lexer.peek(NexusFile::Tokens::EndBlk)
159
+ lexer = NexusFile::Lexer.new("this is not an \"end\"\n\nEnd;")
160
+ assert !lexer.peek(NexusFile::Tokens::EndBlk)
161
+ end
162
+
163
+ def test_semicolon
164
+ lexer = NexusFile::Lexer.new("; Matrix foo")
165
+ assert lexer.peek(NexusFile::Tokens::SemiColon)
166
+ assert foo = lexer.pop(NexusFile::Tokens::SemiColon)
167
+ end
168
+
169
+ def test_label
170
+ lexer = NexusFile::Lexer.new(' \'foo\' bar, blorf; "stuff things" stuff \'and foo\' 23434 ""asdf"" \'Foo_And_Stuff\' ')
171
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
172
+ assert_equal "foo", foo.value
173
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
174
+ assert_equal "bar", foo.value
175
+ assert lexer.pop(NexusFile::Tokens::Comma)
176
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
177
+ assert_equal "blorf", foo.value
178
+ assert lexer.pop(NexusFile::Tokens::SemiColon)
179
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
180
+ assert_equal "stuff things", foo.value
181
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
182
+ assert_equal "stuff", foo.value
183
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
184
+ assert_equal "and foo", foo.value
185
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
186
+ assert_equal "23434", foo.value
187
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
188
+ assert_equal '"asdf"', foo.value
189
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
190
+ assert_equal 'Foo_And_Stuff', foo.value
191
+ end
192
+
193
+ def test_odd_labels
194
+ lexer = NexusFile::Lexer.new("blorf 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' \"\"\" foo \"\"\" '''rupununi''' '''tanzania''' '''cup-shaped''' bar blorf\n;")
195
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
196
+ assert_equal "blorf", foo.value
197
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
198
+ assert_equal "fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)", foo.value
199
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
200
+ assert_equal '"" foo ""', foo.value
201
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
202
+ assert_equal "''rupununi''", foo.value
203
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
204
+ assert_equal "''tanzania''", foo.value
205
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
206
+ assert_equal "''cup-shaped''", foo.value
207
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
208
+ assert_equal "bar", foo.value
209
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
210
+ assert_equal "blorf", foo.value
211
+ assert foo = lexer.pop(NexusFile::Tokens::SemiColon)
212
+ end
213
+
214
+ def test_title
215
+ lexer = NexusFile::Lexer.new( "TITLE 'Scharff&Coddington_1997_Araneidae';")
216
+ assert foo = lexer.pop(NexusFile::Tokens::Title)
217
+ assert_equal "TITLE 'Scharff&Coddington_1997_Araneidae';", foo.value
218
+ end
219
+
220
+
221
+ def test_dimensions
222
+ input = " DIMENSIONS NCHAR= 10"
223
+ lexer = NexusFile::Lexer.new(input)
224
+ assert foo = lexer.pop(NexusFile::Tokens::Dimensions)
225
+ assert_equal "DIMENSIONS", foo.value
226
+ end
227
+
228
+ def test_format
229
+ input = " format NCHAR= 10"
230
+ lexer = NexusFile::Lexer.new(input)
231
+ assert foo = lexer.pop(NexusFile::Tokens::Format)
232
+ assert_equal "format", foo.value
233
+ end
234
+
235
+ def test_odd_value_pair
236
+ lexer = NexusFile::Lexer.new(" TEXT CHARACTER = 3 TEXT = A62.003;
237
+
238
+ TEXT CHARACTER = 4 TEXT = A62.004; \n end; ")
239
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
240
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
241
+ blorf = {:character => "3"}
242
+ assert_equal blorf , foo.value
243
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
244
+ blorf = {:text => "A62.003"}
245
+ assert_equal blorf , foo.value
246
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
247
+ assert_equal "TEXT", foo.value
248
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
249
+ blorf = {:character => "4"}
250
+ assert_equal blorf , foo.value
251
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
252
+ blorf = {:text => "A62.004"}
253
+ assert_equal blorf , foo.value
254
+
255
+ end
256
+
257
+
258
+ def test_value_pair
259
+
260
+ lexer0 = NexusFile::Lexer.new(' DATATYPE=STANDARD ')
261
+ assert foo = lexer0.pop(NexusFile::Tokens::ValuePair)
262
+ blorf = {:datatype => "STANDARD"}
263
+ assert_equal blorf , foo.value
264
+
265
+ lexer = NexusFile::Lexer.new(' DATATYPE = STANDARD ')
266
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
267
+ blorf = {:datatype => "STANDARD"}
268
+ assert_equal blorf , foo.value
269
+
270
+ lexer2 = NexusFile::Lexer.new(' DATATYPE ="STANDARD" ')
271
+ assert foo = lexer2.pop(NexusFile::Tokens::ValuePair)
272
+ assert_equal blorf, foo.value
273
+
274
+ lexer3 = NexusFile::Lexer.new('DATATYPE= "STANDARD" ')
275
+ assert foo = lexer3.pop(NexusFile::Tokens::ValuePair)
276
+ assert_equal blorf, foo.value
277
+
278
+ input= " NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" ; "
279
+ lexer4 = NexusFile::Lexer.new(input)
280
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
281
+ smorf = {:nchar => '10'}
282
+ assert_equal smorf, foo.value
283
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
284
+ smorf = {:ntaxa => '10'}
285
+ assert_equal smorf, foo.value
286
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
287
+ smorf = {:nfoo => '999'}
288
+ assert_equal smorf, foo.value
289
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
290
+ smorf = {:nbar => 'a b c'}
291
+ assert_equal smorf, foo.value
292
+
293
+ lexer5 = NexusFile::Lexer.new(' symbols= " a c b d 1 " ')
294
+ assert foo = lexer5.pop(NexusFile::Tokens::ValuePair)
295
+ smorf = {:symbols => 'a c b d 1'}
296
+ assert_equal smorf, foo.value
297
+
298
+ lexer6 = NexusFile::Lexer.new(' missing = - ')
299
+ assert foo = lexer6.pop(NexusFile::Tokens::ValuePair)
300
+ smorf = {:missing => '-'}
301
+ assert_equal smorf, foo.value
302
+
303
+ lexer6a = NexusFile::Lexer.new("ntaxa=1;\n")
304
+ assert foo = lexer6a.pop(NexusFile::Tokens::ValuePair)
305
+ smorf = {:ntaxa => '1'}
306
+ assert_equal smorf, foo.value
307
+
308
+ lexer7 = NexusFile::Lexer.new("ntaxa =1;\n")
309
+ assert foo = lexer7.pop(NexusFile::Tokens::ValuePair)
310
+ smorf = {:ntaxa => '1'}
311
+ assert_equal smorf, foo.value
312
+
313
+ lexer8 = NexusFile::Lexer.new(" ntaxa = 1 ;\n")
314
+ assert foo = lexer8.pop(NexusFile::Tokens::ValuePair)
315
+ smorf = {:ntaxa => '1'}
316
+ assert_equal smorf, foo.value
317
+
318
+ lexer9 = NexusFile::Lexer.new(" TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ")
319
+ assert foo = lexer9.pop(NexusFile::Tokens::ValuePair)
320
+ smorf = {:tf => "(CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!')" }
321
+ assert_equal smorf, foo.value
322
+
323
+ lexer10 = NexusFile::Lexer.new(" TF = (CM 'This is an value pair that has (parens) within the value, twice! ()') ; some stuff left here ")
324
+ assert foo = lexer10.pop(NexusFile::Tokens::ValuePair)
325
+ smorf = {:tf => "(CM 'This is an value pair that has (parens) within the value, twice! ()')" }
326
+ assert_equal smorf, foo.value
327
+
328
+ lexer11 = NexusFile::Lexer.new("CHARACTER = 1 TEXT = A62.001;")
329
+ assert_equal true, !lexer11.peek(NexusFile::Tokens::SemiColon)
330
+ assert_equal true, lexer11.peek(NexusFile::Tokens::ValuePair)
331
+ assert foo = lexer11.pop(NexusFile::Tokens::ValuePair)
332
+ smorf = {:character => "1" }
333
+ assert_equal smorf, foo.value
334
+ assert foo = lexer11.pop(NexusFile::Tokens::ValuePair)
335
+ end
336
+
337
+ def test_MesquiteIDs
338
+ lexer = NexusFile::Lexer.new('IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 ;
339
+ BLOCKID JC1191fcddc0c0;')
340
+ assert lexer.pop(NexusFile::Tokens::MesquiteIDs)
341
+ assert lexer.pop(NexusFile::Tokens::MesquiteBlockID)
342
+ end
343
+
344
+ def test_TreesBlk
345
+ lexer = NexusFile::Lexer.new("BEGIN TREES;
346
+ Title Imported_trees;
347
+ LINK Taxa = 'Scharff&Coddington_1997_Araneidae';
348
+ TRANSLATE
349
+ 1 Dictyna,
350
+ 2 Uloborus,
351
+ 3 Deinopis,
352
+ 4 Nephila&Herennia,
353
+ 5 'Nephilengys_cruentata',
354
+ 6 Meta,
355
+ 7 Leucauge_venusta,
356
+ 8 Pachygnatha,
357
+ 9 'Theridiosoma_01',
358
+ 10 Tetragnatha;
359
+ TREE 'Imported tree 1+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
360
+ TREE 'Imported tree 2+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
361
+ TREE 'Imported tree 3+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
362
+ TREE 'Imported tree 4+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
363
+ TREE 'Imported tree 5+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
364
+ TREE 'Imported tree 6+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
365
+ TREE 'Imported tree 7+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
366
+ TREE 'Imported tree 8+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
367
+
368
+ END;
369
+
370
+
371
+ BEGIN LABELS;
372
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
373
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
374
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
375
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
376
+
377
+
378
+ END;")
379
+
380
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
381
+ assert foo = lexer.pop(NexusFile::Tokens::TreesBlk)
382
+ assert_equal 'TREES', foo.value.slice(0,5)
383
+ assert_equal 'END;', foo.value.slice(-4,4)
384
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
385
+ assert lexer.pop(NexusFile::Tokens::LabelsBlk)
386
+
387
+ end
388
+
389
+ def test_NotesBlk
390
+ input = "BEGIN NOTES ;"
391
+ lexer = NexusFile::Lexer.new(input)
392
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
393
+ assert foo = lexer.pop(NexusFile::Tokens::NotesBlk)
394
+ assert "NOTES", foo.value
395
+ end
396
+
397
+ def test_LabelsBlk
398
+ lexer = NexusFile::Lexer.new("
399
+ LABELS;
400
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
401
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
402
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
403
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
404
+
405
+
406
+ END;
407
+
408
+ BEGIN some other block;")
409
+
410
+ assert foo = lexer.pop(NexusFile::Tokens::LabelsBlk)
411
+ assert_equal 'LABELS', foo.value.slice(0,6)
412
+ assert_equal 'END;', foo.value.slice(-4,4)
413
+ end
414
+
415
+ def test_SetsBlk
416
+ lexer = NexusFile::Lexer.new("
417
+ SETS;
418
+ CHARPARTITION * UNTITLED = Somatic : 1 - 2 4, MM_Genitalia : 5 - 8 10;
419
+
420
+ END;
421
+ BEGIN some other block;")
422
+
423
+ assert foo = lexer.pop(NexusFile::Tokens::SetsBlk)
424
+ assert_equal 'SETS', foo.value.slice(0,4)
425
+ assert_equal 'END;', foo.value.slice(-4,4)
426
+ end
427
+
428
+
429
+
430
+ def test_lexer_errors
431
+ lexer = NexusFile::Lexer.new("*&")
432
+ assert_raise(NexusFile::ParseError) {lexer.peek(NexusFile::Tokens::ID)}
433
+ end
434
+ end
435
+
436
+
437
+ class Test_Parser < Test::Unit::TestCase
438
+ def setup
439
+ # a Mesquite 2.n or higher file
440
+ @nf = File.read('MX_test_03.nex') # MX_test_01.nex
441
+ end
442
+
443
+ def teardown
444
+ @nf = nil
445
+ end
446
+
447
+ def test_that_file_might_be_nexus
448
+ begin
449
+ assert !parse_nexus_file("#Nexblux Begin Natrix end;")
450
+ rescue NexusFile::ParseError
451
+ assert true
452
+ end
453
+ end
454
+
455
+ def test_parse_initializes
456
+ foo = parse_nexus_file(@nf)
457
+ end
458
+
459
+ def test_parse_file
460
+ # this is the major loop, all parts should exist
461
+ foo = parse_nexus_file(@nf)
462
+
463
+ assert_equal 10, foo.taxa.size
464
+ assert_equal 10, foo.characters.size
465
+ assert_equal 10, foo.codings.size
466
+ assert_equal 1, foo.taxa[1].notes.size # asserts that notes are parsing
467
+ assert_equal "norm", foo.characters[0].states["0"].name
468
+ assert_equal "modified", foo.characters[0].states["1"].name
469
+ end
470
+
471
+ def test_parse_authors_blk
472
+ end
473
+
474
+ def test_taxa_block
475
+ # we've popped off the header already
476
+ input =
477
+ "TITLE 'Scharff&Coddington_1997_Araneidae';
478
+ DIMENSIONS NTAX=10;
479
+ TAXLABELS
480
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
481
+ ;
482
+ IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b137 JC1191fcddc2b136 ;
483
+ BLOCKID JC1191fcddc0c4;
484
+ END;"
485
+
486
+ builder = NexusFile::Builder.new
487
+ lexer = NexusFile::Lexer.new(input)
488
+ NexusFile::Parser.new(lexer,builder).parse_taxa_blk
489
+ foo = builder.nexus_file
490
+
491
+ assert_equal 10, foo.taxa.size
492
+ assert_equal "Dictyna", foo.taxa[0].name
493
+ assert_equal "Nephilengys_cruentata", foo.taxa[4].name
494
+ assert_equal "Theridiosoma_01", foo.taxa[8].name
495
+ assert_equal "Tetragnatha", foo.taxa[9].name
496
+ end
497
+
498
+ def test_taxa_block_without_IDS
499
+ # we've popped off the header already
500
+ input =
501
+ "TITLE 'Scharff&Coddington_1997_Araneidae';
502
+ DIMENSIONS NTAX=10;
503
+ TAXLABELS
504
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
505
+ ;
506
+ END;"
507
+
508
+ builder = NexusFile::Builder.new
509
+ lexer = NexusFile::Lexer.new(input)
510
+ NexusFile::Parser.new(lexer,builder).parse_taxa_blk
511
+ foo = builder.nexus_file
512
+
513
+ assert_equal 10, foo.taxa.size
514
+ assert_equal "Dictyna", foo.taxa[0].name
515
+ assert_equal "Nephilengys_cruentata", foo.taxa[4].name
516
+ assert_equal "Theridiosoma_01", foo.taxa[8].name
517
+ assert_equal "Tetragnatha", foo.taxa[9].name
518
+ end
519
+
520
+
521
+
522
+ def test_parse_characters_blk
523
+ input= "
524
+ TITLE 'Scharff&Coddington_1997_Araneidae';
525
+ DIMENSIONS NCHAR=10;
526
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
527
+ CHARSTATELABELS
528
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
529
+ MATRIX
530
+ Dictyna 0?00201001
531
+ Uloborus 0?11000000
532
+ Deinopis 0?01002???
533
+ Nephila&Herennia 0?21010011
534
+ 'Nephilengys_cruentata'0?(0,1)1010(0,1,2)11
535
+ Meta 0?01A10011
536
+ Leucauge_venusta ???--?-??-
537
+ Pachygnatha 0?210(0 1)0011
538
+ 'Theridiosoma_01' ??????????
539
+ Tetragnatha 0?01011011
540
+
541
+ ;
542
+ IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 ;
543
+ BLOCKID JC1191fcddc0c0;
544
+
545
+ END;"
546
+
547
+ builder = NexusFile::Builder.new
548
+ @lexer = NexusFile::Lexer.new(input)
549
+
550
+ # add the taxa, assumes we have them for comparison purposes, though we (shouldn't) ultimately need them
551
+ # foo.taxa = ["Dictyna", "Uloborus", "Deinopis", "Nephila&Herennia", "Nephilenygys_cruentata", "Meta", "Leucauge_venusta", "Pachygnatha", "Theridiosoma_01", "Tetragnatha"]
552
+
553
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
554
+ (0..9).each{|i| builder.stub_taxon}
555
+
556
+ NexusFile::Parser.new(@lexer,builder).parse_characters_blk
557
+ foo = builder.nexus_file
558
+
559
+ assert_equal 10, foo.characters.size
560
+ assert_equal "Tibia_II", foo.characters[0].name
561
+ assert_equal "TII_macrosetae", foo.characters[1].name
562
+
563
+ assert_equal "norm", foo.characters[0].states["0"].name
564
+ assert_equal "modified", foo.characters[0].states["1"].name
565
+
566
+
567
+ # ?!!?
568
+ # foo.characters[0].states["1"].name
569
+ assert_equal ["", "abs", "pres"], foo.characters[9].states.keys.collect{|s| foo.characters[9].states[s].name}.sort
570
+
571
+
572
+ assert_equal ["0","1"], foo.codings[7][5].states
573
+ assert_equal ["?"], foo.codings[9][1].states
574
+ assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
575
+ end
576
+
577
+ def test_characters_block_without_IDs_or_title
578
+ input= "
579
+ DIMENSIONS NCHAR=10;
580
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
581
+ CHARSTATELABELS
582
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
583
+ MATRIX
584
+ Dictyna 0?00201001
585
+ Uloborus 0?11000000
586
+ Deinopis 0?01002???
587
+ Nephila&Herennia 0?21010011
588
+ 'Nephilengys_cruentata'0?(0,1)1010(0,1,2)11
589
+ Meta 0?01A10011
590
+ Leucauge_venusta ???--?-??-
591
+ Pachygnatha 0?210(0 1)0011
592
+ 'Theridiosoma_01' ??????????
593
+ Tetragnatha 0?01011011
594
+
595
+ ;
596
+ END;"
597
+
598
+ builder = NexusFile::Builder.new
599
+ @lexer = NexusFile::Lexer.new(input)
600
+
601
+ # add the taxa, assumes we have them for comparison purposes, though we (shouldn't) ultimately need them
602
+ # foo.taxa = ["Dictyna", "Uloborus", "Deinopis", "Nephila&Herennia", "Nephilenygys_cruentata", "Meta", "Leucauge_venusta", "Pachygnatha", "Theridiosoma_01", "Tetragnatha"]
603
+
604
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
605
+ (0..9).each{|i| builder.stub_taxon}
606
+
607
+ NexusFile::Parser.new(@lexer,builder).parse_characters_blk
608
+ foo = builder.nexus_file
609
+
610
+ assert_equal 10, foo.characters.size
611
+ assert_equal "Tibia_II", foo.characters[0].name
612
+ assert_equal "TII_macrosetae", foo.characters[1].name
613
+ assert_equal "norm", foo.characters[0].states["0"].name
614
+ assert_equal "modified", foo.characters[0].states["1"].name
615
+ assert_equal ["", "abs", "pres"], foo.characters[9].states.keys.collect{|s| foo.characters[9].states[s].name}.sort
616
+ assert_equal ["0","1"], foo.codings[7][5].states
617
+ assert_equal ["?"], foo.codings[9][1].states
618
+ assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
619
+ end
620
+
621
+ def test_characters_block_from_file
622
+ foo = parse_nexus_file(@nf)
623
+ assert 10, foo.characters.size
624
+ end
625
+
626
+ def test_codings
627
+ foo = parse_nexus_file(@nf)
628
+ assert 100, foo.codings.size # two multistates count in single cells
629
+ end
630
+
631
+ def test_parse_dimensions
632
+ input= " DIMENSIONS NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" blorf=2; "
633
+ builder = NexusFile::Builder.new
634
+ lexer = NexusFile::Lexer.new(input)
635
+
636
+ NexusFile::Parser.new(lexer,builder).parse_dimensions
637
+ foo = builder.nexus_file
638
+
639
+ assert_equal "10", foo.vars[:nchar]
640
+ assert_equal "10", foo.vars[:ntaxa]
641
+ assert_equal "999", foo.vars[:nfoo]
642
+ assert_equal 'a b c', foo.vars[:nbar]
643
+ assert_equal '2', foo.vars[:blorf]
644
+ # add test that nothing is left in lexer
645
+ end
646
+
647
+ def test_parse_format
648
+ input = "FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
649
+ builder = NexusFile::Builder.new
650
+ lexer = NexusFile::Lexer.new(input)
651
+
652
+ NexusFile::Parser.new(lexer,builder).parse_format
653
+ foo = builder.nexus_file
654
+
655
+ assert_equal "STANDARD", foo.vars[:datatype]
656
+ assert_equal "-", foo.vars[:gap]
657
+ assert_equal "?", foo.vars[:missing]
658
+ assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
659
+ # add test that nothing is left in lexer
660
+ end
661
+
662
+ def test_parse_chr_state_labels
663
+ input =" CHARSTATELABELS
664
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
665
+ MATRIX
666
+ fooo 01 more stuff here that should not be hit"
667
+
668
+ builder = NexusFile::Builder.new
669
+ lexer = NexusFile::Lexer.new(input)
670
+
671
+ (0..9).each{builder.stub_chr()}
672
+
673
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
674
+
675
+ foo = builder.nexus_file
676
+ assert_equal 10, foo.characters.size
677
+ assert_equal "Tibia_II", foo.characters[0].name
678
+ assert_equal "norm", foo.characters[0].states["0"].name
679
+ assert_equal "modified", foo.characters[0].states["1"].name
680
+
681
+ assert_equal "TII_macrosetae", foo.characters[1].name
682
+ assert_equal "= TI", foo.characters[1].states["0"].name
683
+ assert_equal "stronger", foo.characters[1].states["1"].name
684
+
685
+ assert_equal "Femoral_tuber", foo.characters[2].name
686
+ assert_equal "abs", foo.characters[2].states["0"].name
687
+ assert_equal "pres", foo.characters[2].states["1"].name
688
+ assert_equal "m-setae", foo.characters[2].states["2"].name
689
+
690
+ assert_equal "Undefined", foo.characters[3].name
691
+ assert_equal 0, foo.characters[3].states.keys.size
692
+
693
+ assert_equal "Cymbium", foo.characters[4].name
694
+ assert_equal "dorsal", foo.characters[4].states["0"].name
695
+ assert_equal "mesal", foo.characters[4].states["1"].name
696
+ assert_equal "lateral", foo.characters[4].states["2"].name
697
+
698
+ assert_equal "Paracymbium", foo.characters[5].name
699
+ assert_equal "abs", foo.characters[5].states["0"].name
700
+ assert_equal "pres", foo.characters[5].states["1"].name
701
+
702
+ assert_equal "Globular_tegulum", foo.characters[6].name
703
+ assert_equal "abs", foo.characters[6].states["0"].name
704
+ assert_equal "pres", foo.characters[6].states["1"].name
705
+
706
+ assert_equal "Undefined", foo.characters[7].name
707
+ assert_equal "entire", foo.characters[7].states["0"].name
708
+ assert_equal "w_lobe", foo.characters[7].states["1"].name
709
+
710
+ # ...
711
+
712
+ assert_equal "Median_apophysis", foo.characters[9].name
713
+ assert_equal "pres", foo.characters[9].states["0"].name
714
+ assert_equal "abs", foo.characters[9].states["1"].name
715
+ end
716
+
717
+ def test_strange_chr_state_labels
718
+ input =" CHARSTATELABELS
719
+ 29 'Metatarsal trichobothria (CodAra.29)' / 37623 '>2', 30 'Spinneret cuticle (CodAra.30)' / annulate ridged squamate;
720
+ Matrix
721
+ fooo 01 more stuff here that should not be hit"
722
+
723
+ builder = NexusFile::Builder.new
724
+ lexer = NexusFile::Lexer.new(input)
725
+
726
+ (0..29).each{builder.stub_chr()}
727
+
728
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
729
+
730
+ foo = builder.nexus_file
731
+
732
+ assert_equal "Metatarsal trichobothria (CodAra.29)", foo.characters[28].name
733
+ assert_equal "37623", foo.characters[28].states["0"].name
734
+ assert_equal ">2", foo.characters[28].states["1"].name
735
+
736
+ assert_equal "Spinneret cuticle (CodAra.30)", foo.characters[29].name
737
+ assert_equal "annulate", foo.characters[29].states["0"].name
738
+ assert_equal "ridged", foo.characters[29].states["1"].name
739
+ assert_equal "squamate", foo.characters[29].states["2"].name
740
+
741
+ end
742
+
743
+ def DONT_test_parse_really_long_string_of_chr_state_labels
744
+ input =" CHARSTATELABELS
745
+ 1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
746
+ MATRIX
747
+ fooo 01 more stuff here that should not be hit"
748
+
749
+ builder = NexusFile::Builder.new
750
+ lexer = NexusFile::Lexer.new(input)
751
+
752
+ (0..147).each{builder.stub_chr()}
753
+
754
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
755
+
756
+ foo = builder.nexus_file
757
+ assert_equal 10, foo.characters.size
758
+ assert_equal "Tibia_II", foo.characters[0].name
759
+ assert_equal "norm", foo.characters[0].states["0"].name
760
+ assert_equal "modified", foo.characters[0].states["1"].name
761
+
762
+ assert_equal "TII_macrosetae", foo.characters[1].name
763
+ assert_equal "= TI", foo.characters[1].states["0"].name
764
+ assert_equal "stronger", foo.characters[1].states["1"].name
765
+
766
+ assert_equal "Femoral_tuber", foo.characters[2].name
767
+ assert_equal "abs", foo.characters[2].states["0"].name
768
+ assert_equal "pres", foo.characters[2].states["1"].name
769
+ assert_equal "m-setae", foo.characters[2].states["2"].name
770
+
771
+ assert_equal "Undefined", foo.characters[3].name
772
+ assert_equal 0, foo.characters[3].states.keys.size
773
+
774
+ assert_equal "Cymbium", foo.characters[4].name
775
+ assert_equal "dorsal", foo.characters[4].states["0"].name
776
+ assert_equal "mesal", foo.characters[4].states["1"].name
777
+ assert_equal "lateral", foo.characters[4].states["2"].name
778
+
779
+ assert_equal "Paracymbium", foo.characters[5].name
780
+ assert_equal "abs", foo.characters[5].states["0"].name
781
+ assert_equal "pres", foo.characters[5].states["1"].name
782
+
783
+ assert_equal "Globular_tegulum", foo.characters[6].name
784
+ assert_equal "abs", foo.characters[6].states["0"].name
785
+ assert_equal "pres", foo.characters[6].states["1"].name
786
+
787
+ assert_equal "Undefined", foo.characters[7].name
788
+ assert_equal "entire", foo.characters[7].states["0"].name
789
+ assert_equal "w_lobe", foo.characters[7].states["1"].name
790
+
791
+ # ...
792
+
793
+ assert_equal "Median_apophysis", foo.characters[9].name
794
+ assert_equal "pres", foo.characters[9].states["0"].name
795
+ assert_equal "abs", foo.characters[9].states["1"].name
796
+ end
797
+
798
+
799
+
800
+ def test_parse_notes_blk
801
+ input ="
802
+ TEXT TAXA = 'Scharff&Coddington_1997_Araneidae' TAXON = 2 TEXT = 'This is a footnote to taxon 2, Uloborus';
803
+
804
+ TEXT TAXON = 4 CHARACTER = 8 TEXT = This_is_a_footnote_to_a_cell.;
805
+
806
+ TEXT CHARACTER = 10 TEXT = This_is_footnote_to_char_10;
807
+
808
+ TEXT FILE TEXT = 'Scharff, N. and J. A. Coddington. 1997. A phylogenetic analysis of the orb-weaving spider family Araneidae (Arachnida, Araneae). Zool. J. Linn. Soc. 120(4): 355?434';
809
+
810
+ AN T = 4 A = JC DC = 2008.4.13.20.31.19 DM = 2008.4.13.20.31.38 ID = 01194a57d0161 I = _ TF = (CM 'This is an \"annotation\" to taxon 4') ;
811
+
812
+ AN C = 4 A = JC DC = 2008.4.13.20.31.50 DM = 2008.4.13.20.32.10 ID = 01194a584b9f2 I = _ TF = (CM 'This is an annotation to charcter 4, that has no name.') ;
813
+
814
+ AN T = 9 C = 3 A = 0 DC = 2008.4.20.17.24.36 DM = 2008.4.20.17.25.4 ID = 01196db963874 I = _ TF = (CM 'This is an annotation to chr 3, taxa 9, coded ?') ;
815
+
816
+ AN T = 2 C = 6 A = JC DC = 2008.4.13.20.35.20 DM = 2008.4.13.20.35.36 ID = JC1194a5b7e1a3 I = _ TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ;
817
+
818
+ AN T = 7 C = 10 A = 0 DC = 2008.4.20.17.25.11 DM = 2008.4.20.17.26.1 ID = 01196db9ebd25 I = _ TF = (CM 'this is an annotation^nwith several hard returns^nfor a cell of taxa 6, chr 9 (from zero)^ncoded as -') ;
819
+
820
+ AN T = 2 C = 6 A = JC DC = 2008.4.13.20.35.20 DM = 2008.4.13.20.35.36 ID = JC1194a5b7e1a3 I = _ TF = (CM 'This is ANOTHER annotation that haa a hard return in it^n^n^n^nSo there!') ;
821
+
822
+ END; Don't parse this bit, eh?"
823
+
824
+ # note the second last note note embedds parens in the value
825
+
826
+ builder = NexusFile::Builder.new
827
+ lexer = NexusFile::Lexer.new(input)
828
+
829
+ # stubs
830
+ (0..9).each{builder.stub_chr()}
831
+ (0..9).each{builder.stub_taxon()}
832
+ builder.nexus_file.codings[3] = []
833
+ builder.nexus_file.codings[3][7] = NexusFile::NexusFile::Coding.new()
834
+ builder.nexus_file.codings[8] = []
835
+ builder.nexus_file.codings[8][2] = NexusFile::NexusFile::Coding.new()
836
+ builder.nexus_file.codings[1] = []
837
+ builder.nexus_file.codings[1][5] = NexusFile::NexusFile::Coding.new()
838
+ builder.nexus_file.codings[6] = []
839
+ builder.nexus_file.codings[6][9] = NexusFile::NexusFile::Coding.new()
840
+ builder.nexus_file.codings[3] = []
841
+ builder.nexus_file.codings[3][7] = NexusFile::NexusFile::Coding.new()
842
+
843
+ NexusFile::Parser.new(lexer,builder).parse_notes_blk
844
+
845
+ foo = builder.nexus_file
846
+
847
+ # make sure stubs are setup
848
+ assert_equal 10, foo.characters.size
849
+ assert_equal 10, foo.taxa.size
850
+
851
+ assert_equal 1, foo.taxa[1].notes.size
852
+ assert_equal 1, foo.codings[3][7].notes.size
853
+ assert_equal 'This_is_a_footnote_to_a_cell.', foo.codings[3][7].notes[0].note
854
+
855
+ assert_equal 1, foo.characters[9].notes.size
856
+ assert_equal 'This_is_footnote_to_char_10', foo.characters[9].notes[0].note
857
+
858
+ assert_equal 1, foo.notes.size
859
+ assert_equal 'Scharff, N. and J. A. Coddington. 1997. A phylogenetic analysis of the orb-weaving spider family Araneidae (Arachnida, Araneae). Zool. J. Linn. Soc. 120(4): 355?434', foo.notes[0].note
860
+
861
+ assert_equal 1, foo.taxa[3].notes.size
862
+ assert_equal 1, foo.characters[3].notes.size
863
+ assert_equal 1, foo.codings[8][2].notes.size
864
+ assert_equal 1, foo.codings[6][9].notes.size
865
+ assert_equal 2, foo.codings[1][5].notes.size # TWO!!
866
+ assert_equal 1, foo.codings[3][7].notes.size
867
+
868
+
869
+ assert_equal "This_is_a_footnote_to_a_cell.", foo.codings[3][7].notes[0].note
870
+
871
+ assert_equal "This is an annotation to chr 3, taxa 9, coded ?", foo.codings[8][2].notes[0].note
872
+ assert_equal "This is an annotation that haa a hard return in it^n^n^n^nSo there!", foo.codings[1][5].notes[0].note
873
+ assert_equal "this is an annotation^nwith several hard returns^nfor a cell of taxa 6, chr 9 (from zero)^ncoded as -", foo.codings[6][9].notes[0].note
874
+ assert_equal "This is ANOTHER annotation that haa a hard return in it^n^n^n^nSo there!", foo.codings[1][5].notes[1].note
875
+
876
+ end
877
+
878
+ def test_notes_block_2
879
+ input="
880
+ TEXT CHARACTER = 1 TEXT = A62.001;
881
+ TEXT CHARACTER = 2 TEXT = A62.002;
882
+ TEXT CHARACTER = 3 TEXT = A62.003;
883
+ TEXT CHARACTER = 4 TEXT = A62.004;
884
+ TEXT CHARACTER = 5 TEXT = A62.005;
885
+ TEXT CHARACTER = 6 TEXT = A62.006;
886
+ TEXT CHARACTER = 7 TEXT = A62.007;
887
+ TEXT CHARACTER = 8 TEXT = A62.008;
888
+ end;
889
+ "
890
+
891
+ # note the second last note note embeds parens in the value
892
+
893
+ builder = NexusFile::Builder.new
894
+ lexer = NexusFile::Lexer.new(input)
895
+ # stubs
896
+ (0..9).each{builder.stub_chr()}
897
+
898
+ NexusFile::Parser.new(lexer,builder).parse_notes_blk
899
+
900
+ foo = builder.nexus_file
901
+
902
+ # make sure stubs are setup
903
+ assert_equal 10, foo.characters.size
904
+
905
+ assert_equal 'A62.001', foo.characters[0].notes[0].note
906
+ assert_equal 'A62.002', foo.characters[1].notes[0].note
907
+ assert_equal 'A62.003', foo.characters[2].notes[0].note
908
+ assert_equal 'A62.004', foo.characters[3].notes[0].note
909
+ assert_equal 'A62.005', foo.characters[4].notes[0].note
910
+ assert_equal 'A62.006', foo.characters[5].notes[0].note
911
+ assert_equal 'A62.007', foo.characters[6].notes[0].note
912
+ assert_equal 'A62.008', foo.characters[7].notes[0].note
913
+ assert_equal NexusFile::NexusFile::Character, foo.characters[7].class
914
+ assert_equal 1, foo.characters[7].notes.size
915
+ end
916
+
917
+
918
+ def test_parse_trees_block
919
+ end
920
+
921
+ def test_parse_labels_block
922
+ end
923
+
924
+ def test_parse_sets_block
925
+ end
926
+
927
+ def test_parse_assumptions_block
928
+ end
929
+
930
+ def DONT_test_misc
931
+ nf = File.read('foo.nex') # MX_test_01.nex
932
+ foo = parse_nexus_file(nf)
933
+ assert true, foo
934
+ end
935
+
936
+ end
937
+