nexus_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,937 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'ruby-debug'
4
+
5
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/nexus_file'))
6
+
7
+ class NexusParserTest < Test::Unit::TestCase
8
+ def test_truth
9
+ assert true
10
+ end
11
+ end
12
+
13
+ class Test_NexusFile_Builder < Test::Unit::TestCase
14
+ def test_builder
15
+ b = NexusFile::Builder.new
16
+ assert foo = b.nexus_file
17
+ assert_equal [], foo.taxa
18
+ assert_equal [], foo.characters
19
+ assert_equal [], foo.codings
20
+ assert_equal [], foo.sets
21
+ end
22
+ end
23
+
24
+
25
+ class Test_Regex < Test::Unit::TestCase
26
+ def test_begin_taxa
27
+ txt = " aslkfja\n Begin taxa; BLorf end; "
28
+ @regexp = Regexp.new(/\s*(Begin\s*taxa\s*;)\s*/i)
29
+ assert txt =~ @regexp
30
+ end
31
+
32
+ end
33
+
34
+
35
+ class Test_Lexer < Test::Unit::TestCase
36
+ def test_lexer
37
+ lexer = NexusFile::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
38
+ assert lexer.pop(NexusFile::Tokens::LBracket)
39
+ assert id = lexer.pop(NexusFile::Tokens::ID)
40
+ assert_equal(id.value, "foo")
41
+ assert lexer.pop(NexusFile::Tokens::RBracket)
42
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
43
+ assert lexer.pop(NexusFile::Tokens::TaxaBlk)
44
+ assert foo = lexer.pop(NexusFile::Tokens::ID)
45
+ assert_equal("BLORF", foo.value) # truncating whitespace
46
+ assert lexer.pop(NexusFile::Tokens::BlkEnd)
47
+
48
+ lexer2 = NexusFile::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
49
+ assert lexer2.pop(NexusFile::Tokens::LBracket)
50
+ assert id = lexer2.pop(NexusFile::Tokens::ID)
51
+ assert_equal(id.value, "foo")
52
+ assert lexer2.pop(NexusFile::Tokens::RBracket)
53
+ assert lexer2.pop(NexusFile::Tokens::BeginBlk)
54
+ assert lexer2.pop(NexusFile::Tokens::AuthorsBlk)
55
+ assert lexer2.pop(NexusFile::Tokens::LBracket)
56
+ assert lexer2.pop(NexusFile::Tokens::RBracket)
57
+ assert lexer2.pop(NexusFile::Tokens::LParen)
58
+ assert lexer2.pop(NexusFile::Tokens::RParen)
59
+
60
+
61
+ lexer3 = NexusFile::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
62
+ assert lexer3.pop(NexusFile::Tokens::LBracket)
63
+ assert id = lexer3.pop(NexusFile::Tokens::ID)
64
+ assert_equal(id.value, "foo")
65
+ assert lexer3.pop(NexusFile::Tokens::RBracket)
66
+ assert lexer3.pop(NexusFile::Tokens::BeginBlk)
67
+ assert lexer3.pop(NexusFile::Tokens::ChrsBlk)
68
+ assert foo = lexer3.pop(NexusFile::Tokens::ID)
69
+ assert_equal("BLORF", foo.value)
70
+ assert lexer3.pop(NexusFile::Tokens::BlkEnd)
71
+
72
+ lexer4 = NexusFile::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
73
+ assert lexer4.pop(NexusFile::Tokens::BeginBlk)
74
+ assert lexer4.pop(NexusFile::Tokens::ChrsBlk)
75
+ assert foo = lexer4.pop(NexusFile::Tokens::Number)
76
+ assert_equal(123123123, foo.value)
77
+ assert lexer4.pop(NexusFile::Tokens::BlkEnd)
78
+
79
+ lexer5 = NexusFile::Lexer.new("(0,1)")
80
+ assert lexer5.pop(NexusFile::Tokens::LParen)
81
+ assert foo = lexer5.pop(NexusFile::Tokens::Number)
82
+ assert_equal(0, foo.value)
83
+ assert lexer5.pop(NexusFile::Tokens::Comma)
84
+ assert foo = lexer5.pop(NexusFile::Tokens::Number)
85
+ assert_equal(1, foo.value)
86
+ assert lexer5.pop(NexusFile::Tokens::RParen)
87
+
88
+ lexer6 = NexusFile::Lexer.new(" 210(0,1)10A1\n")
89
+ assert foo = lexer6.pop(NexusFile::Tokens::RowVec)
90
+ assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
91
+
92
+ lexer6a = NexusFile::Lexer.new(" 21a(0 1)0b{3 4 5}(0)(1 a)\n")
93
+ assert foo = lexer6a.pop(NexusFile::Tokens::RowVec)
94
+ assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
95
+
96
+ lexer6b = NexusFile::Lexer.new(" 201{0 1}{0 1}0100)\x0A") # *nix line ending
97
+ assert foo = lexer6b.pop(NexusFile::Tokens::RowVec)
98
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
99
+
100
+ lexer6c = NexusFile::Lexer.new(" 201{0 1}{0 1}0100)\x0D\x0A") # * dos line ending
101
+ assert foo = lexer6c.pop(NexusFile::Tokens::RowVec)
102
+ assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
103
+
104
+
105
+ lexer7 = NexusFile::Lexer.new("read nothing till Nexus, not that nexus 13243 Block [] ();, this one: #nexus FOO")
106
+ assert foo = lexer7.pop(NexusFile::Tokens::NexusStart)
107
+ assert_equal('#nexus', foo.value)
108
+
109
+
110
+ ## we strip comments before parsing now
111
+ # lexer8 = NexusFile::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
112
+ # assert foo = lexer8.pop(NexusFile::Tokens::NexusComment)
113
+ # assert_equal "foo", foo.value
114
+
115
+ # assert lexer.pop(NexusFile::Tokens::Colon)
116
+ # assert num = lexer.pop(NexusFile::Tokens::Number)
117
+ # assert_equal(num.value, 0.0)
118
+ # assert lexer.pop(NexusFile::Tokens::Comma)
119
+ # assert lexer.pop(NexusFile::Tokens::SemiColon)
120
+ end
121
+
122
+ def test_row_vec
123
+ lexer = NexusFile::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
124
+ assert foo = lexer.pop(NexusFile::Tokens::RowVec)
125
+ assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "BD", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
126
+ end
127
+
128
+ def test_punctuation
129
+ lexer = NexusFile::Lexer.new(',/=](\'NOT23\'[);,')
130
+ assert lexer.peek(NexusFile::Tokens::Comma)
131
+ assert lexer.pop(NexusFile::Tokens::Comma)
132
+ assert lexer.pop(NexusFile::Tokens::BckSlash)
133
+ assert lexer.pop(NexusFile::Tokens::Equals)
134
+ assert lexer.pop(NexusFile::Tokens::RBracket)
135
+ assert lexer.pop(NexusFile::Tokens::LParen)
136
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
137
+ assert_equal "NOT23", foo.value
138
+ assert lexer.pop(NexusFile::Tokens::LBracket)
139
+ assert lexer.pop(NexusFile::Tokens::RParen)
140
+ assert lexer.pop(NexusFile::Tokens::SemiColon)
141
+ assert lexer.pop(NexusFile::Tokens::Comma)
142
+
143
+ end
144
+
145
+ def test_tax_labels
146
+ lexer = NexusFile::Lexer.new("Taxlabels 'foo' bar blorf \"stuff things\" stuff 'and foo';")
147
+ assert foo = lexer.pop(NexusFile::Tokens::Taxlabels)
148
+ assert_equal("Taxlabels ", foo.value)
149
+ end
150
+
151
+ def test_EndBlk
152
+ lexer = NexusFile::Lexer.new(" \n\n End ;")
153
+ assert foo = lexer.pop(NexusFile::Tokens::EndBlk)
154
+ lexer = NexusFile::Lexer.new("\n\nEnd;")
155
+ assert foo = lexer.pop(NexusFile::Tokens::EndBlk)
156
+
157
+ lexer = NexusFile::Lexer.new("123123 \n\nEnd;")
158
+ assert !lexer.peek(NexusFile::Tokens::EndBlk)
159
+ lexer = NexusFile::Lexer.new("this is not an \"end\"\n\nEnd;")
160
+ assert !lexer.peek(NexusFile::Tokens::EndBlk)
161
+ end
162
+
163
+ def test_semicolon
164
+ lexer = NexusFile::Lexer.new("; Matrix foo")
165
+ assert lexer.peek(NexusFile::Tokens::SemiColon)
166
+ assert foo = lexer.pop(NexusFile::Tokens::SemiColon)
167
+ end
168
+
169
+ def test_label
170
+ lexer = NexusFile::Lexer.new(' \'foo\' bar, blorf; "stuff things" stuff \'and foo\' 23434 ""asdf"" \'Foo_And_Stuff\' ')
171
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
172
+ assert_equal "foo", foo.value
173
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
174
+ assert_equal "bar", foo.value
175
+ assert lexer.pop(NexusFile::Tokens::Comma)
176
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
177
+ assert_equal "blorf", foo.value
178
+ assert lexer.pop(NexusFile::Tokens::SemiColon)
179
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
180
+ assert_equal "stuff things", foo.value
181
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
182
+ assert_equal "stuff", foo.value
183
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
184
+ assert_equal "and foo", foo.value
185
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
186
+ assert_equal "23434", foo.value
187
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
188
+ assert_equal '"asdf"', foo.value
189
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
190
+ assert_equal 'Foo_And_Stuff', foo.value
191
+ end
192
+
193
+ def test_odd_labels
194
+ lexer = NexusFile::Lexer.new("blorf 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' \"\"\" foo \"\"\" '''rupununi''' '''tanzania''' '''cup-shaped''' bar blorf\n;")
195
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
196
+ assert_equal "blorf", foo.value
197
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
198
+ assert_equal "fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)", foo.value
199
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
200
+ assert_equal '"" foo ""', foo.value
201
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
202
+ assert_equal "''rupununi''", foo.value
203
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
204
+ assert_equal "''tanzania''", foo.value
205
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
206
+ assert_equal "''cup-shaped''", foo.value
207
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
208
+ assert_equal "bar", foo.value
209
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
210
+ assert_equal "blorf", foo.value
211
+ assert foo = lexer.pop(NexusFile::Tokens::SemiColon)
212
+ end
213
+
214
+ def test_title
215
+ lexer = NexusFile::Lexer.new( "TITLE 'Scharff&Coddington_1997_Araneidae';")
216
+ assert foo = lexer.pop(NexusFile::Tokens::Title)
217
+ assert_equal "TITLE 'Scharff&Coddington_1997_Araneidae';", foo.value
218
+ end
219
+
220
+
221
+ def test_dimensions
222
+ input = " DIMENSIONS NCHAR= 10"
223
+ lexer = NexusFile::Lexer.new(input)
224
+ assert foo = lexer.pop(NexusFile::Tokens::Dimensions)
225
+ assert_equal "DIMENSIONS", foo.value
226
+ end
227
+
228
+ def test_format
229
+ input = " format NCHAR= 10"
230
+ lexer = NexusFile::Lexer.new(input)
231
+ assert foo = lexer.pop(NexusFile::Tokens::Format)
232
+ assert_equal "format", foo.value
233
+ end
234
+
235
+ def test_odd_value_pair
236
+ lexer = NexusFile::Lexer.new(" TEXT CHARACTER = 3 TEXT = A62.003;
237
+
238
+ TEXT CHARACTER = 4 TEXT = A62.004; \n end; ")
239
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
240
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
241
+ blorf = {:character => "3"}
242
+ assert_equal blorf , foo.value
243
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
244
+ blorf = {:text => "A62.003"}
245
+ assert_equal blorf , foo.value
246
+ assert foo = lexer.pop(NexusFile::Tokens::Label)
247
+ assert_equal "TEXT", foo.value
248
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
249
+ blorf = {:character => "4"}
250
+ assert_equal blorf , foo.value
251
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
252
+ blorf = {:text => "A62.004"}
253
+ assert_equal blorf , foo.value
254
+
255
+ end
256
+
257
+
258
+ def test_value_pair
259
+
260
+ lexer0 = NexusFile::Lexer.new(' DATATYPE=STANDARD ')
261
+ assert foo = lexer0.pop(NexusFile::Tokens::ValuePair)
262
+ blorf = {:datatype => "STANDARD"}
263
+ assert_equal blorf , foo.value
264
+
265
+ lexer = NexusFile::Lexer.new(' DATATYPE = STANDARD ')
266
+ assert foo = lexer.pop(NexusFile::Tokens::ValuePair)
267
+ blorf = {:datatype => "STANDARD"}
268
+ assert_equal blorf , foo.value
269
+
270
+ lexer2 = NexusFile::Lexer.new(' DATATYPE ="STANDARD" ')
271
+ assert foo = lexer2.pop(NexusFile::Tokens::ValuePair)
272
+ assert_equal blorf, foo.value
273
+
274
+ lexer3 = NexusFile::Lexer.new('DATATYPE= "STANDARD" ')
275
+ assert foo = lexer3.pop(NexusFile::Tokens::ValuePair)
276
+ assert_equal blorf, foo.value
277
+
278
+ input= " NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" ; "
279
+ lexer4 = NexusFile::Lexer.new(input)
280
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
281
+ smorf = {:nchar => '10'}
282
+ assert_equal smorf, foo.value
283
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
284
+ smorf = {:ntaxa => '10'}
285
+ assert_equal smorf, foo.value
286
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
287
+ smorf = {:nfoo => '999'}
288
+ assert_equal smorf, foo.value
289
+ assert foo = lexer4.pop(NexusFile::Tokens::ValuePair)
290
+ smorf = {:nbar => 'a b c'}
291
+ assert_equal smorf, foo.value
292
+
293
+ lexer5 = NexusFile::Lexer.new(' symbols= " a c b d 1 " ')
294
+ assert foo = lexer5.pop(NexusFile::Tokens::ValuePair)
295
+ smorf = {:symbols => 'a c b d 1'}
296
+ assert_equal smorf, foo.value
297
+
298
+ lexer6 = NexusFile::Lexer.new(' missing = - ')
299
+ assert foo = lexer6.pop(NexusFile::Tokens::ValuePair)
300
+ smorf = {:missing => '-'}
301
+ assert_equal smorf, foo.value
302
+
303
+ lexer6a = NexusFile::Lexer.new("ntaxa=1;\n")
304
+ assert foo = lexer6a.pop(NexusFile::Tokens::ValuePair)
305
+ smorf = {:ntaxa => '1'}
306
+ assert_equal smorf, foo.value
307
+
308
+ lexer7 = NexusFile::Lexer.new("ntaxa =1;\n")
309
+ assert foo = lexer7.pop(NexusFile::Tokens::ValuePair)
310
+ smorf = {:ntaxa => '1'}
311
+ assert_equal smorf, foo.value
312
+
313
+ lexer8 = NexusFile::Lexer.new(" ntaxa = 1 ;\n")
314
+ assert foo = lexer8.pop(NexusFile::Tokens::ValuePair)
315
+ smorf = {:ntaxa => '1'}
316
+ assert_equal smorf, foo.value
317
+
318
+ lexer9 = NexusFile::Lexer.new(" TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ")
319
+ assert foo = lexer9.pop(NexusFile::Tokens::ValuePair)
320
+ smorf = {:tf => "(CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!')" }
321
+ assert_equal smorf, foo.value
322
+
323
+ lexer10 = NexusFile::Lexer.new(" TF = (CM 'This is an value pair that has (parens) within the value, twice! ()') ; some stuff left here ")
324
+ assert foo = lexer10.pop(NexusFile::Tokens::ValuePair)
325
+ smorf = {:tf => "(CM 'This is an value pair that has (parens) within the value, twice! ()')" }
326
+ assert_equal smorf, foo.value
327
+
328
+ lexer11 = NexusFile::Lexer.new("CHARACTER = 1 TEXT = A62.001;")
329
+ assert_equal true, !lexer11.peek(NexusFile::Tokens::SemiColon)
330
+ assert_equal true, lexer11.peek(NexusFile::Tokens::ValuePair)
331
+ assert foo = lexer11.pop(NexusFile::Tokens::ValuePair)
332
+ smorf = {:character => "1" }
333
+ assert_equal smorf, foo.value
334
+ assert foo = lexer11.pop(NexusFile::Tokens::ValuePair)
335
+ end
336
+
337
+ def test_MesquiteIDs
338
+ lexer = NexusFile::Lexer.new('IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 ;
339
+ BLOCKID JC1191fcddc0c0;')
340
+ assert lexer.pop(NexusFile::Tokens::MesquiteIDs)
341
+ assert lexer.pop(NexusFile::Tokens::MesquiteBlockID)
342
+ end
343
+
344
+ def test_TreesBlk
345
+ lexer = NexusFile::Lexer.new("BEGIN TREES;
346
+ Title Imported_trees;
347
+ LINK Taxa = 'Scharff&Coddington_1997_Araneidae';
348
+ TRANSLATE
349
+ 1 Dictyna,
350
+ 2 Uloborus,
351
+ 3 Deinopis,
352
+ 4 Nephila&Herennia,
353
+ 5 'Nephilengys_cruentata',
354
+ 6 Meta,
355
+ 7 Leucauge_venusta,
356
+ 8 Pachygnatha,
357
+ 9 'Theridiosoma_01',
358
+ 10 Tetragnatha;
359
+ TREE 'Imported tree 1+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
360
+ TREE 'Imported tree 2+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
361
+ TREE 'Imported tree 3+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
362
+ TREE 'Imported tree 4+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
363
+ TREE 'Imported tree 5+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
364
+ TREE 'Imported tree 6+' = (1,((2,3),(((4,5),(6,(7,(8,10)))),9)));
365
+ TREE 'Imported tree 7+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
366
+ TREE 'Imported tree 8+' = (1,((2,3),(((6,(4,5)),(7,(8,10))),9)));
367
+
368
+ END;
369
+
370
+
371
+ BEGIN LABELS;
372
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
373
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
374
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
375
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
376
+
377
+
378
+ END;")
379
+
380
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
381
+ assert foo = lexer.pop(NexusFile::Tokens::TreesBlk)
382
+ assert_equal 'TREES', foo.value.slice(0,5)
383
+ assert_equal 'END;', foo.value.slice(-4,4)
384
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
385
+ assert lexer.pop(NexusFile::Tokens::LabelsBlk)
386
+
387
+ end
388
+
389
+ def test_NotesBlk
390
+ input = "BEGIN NOTES ;"
391
+ lexer = NexusFile::Lexer.new(input)
392
+ assert lexer.pop(NexusFile::Tokens::BeginBlk)
393
+ assert foo = lexer.pop(NexusFile::Tokens::NotesBlk)
394
+ assert "NOTES", foo.value
395
+ end
396
+
397
+ def test_LabelsBlk
398
+ lexer = NexusFile::Lexer.new("
399
+ LABELS;
400
+ CHARGROUPLABEL MM_Genitalia COLOR = (RGB 1.0 0.4 0.4) ;
401
+ CHARGROUPLABEL Somatic COLOR = (RGB 0.6 1.0 0.33333333) ;
402
+ CHARGROUPLABEL Spinnerets COLOR = (RGB 0.46666667 0.57254902 1.0) ;
403
+ CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
404
+
405
+
406
+ END;
407
+
408
+ BEGIN some other block;")
409
+
410
+ assert foo = lexer.pop(NexusFile::Tokens::LabelsBlk)
411
+ assert_equal 'LABELS', foo.value.slice(0,6)
412
+ assert_equal 'END;', foo.value.slice(-4,4)
413
+ end
414
+
415
+ def test_SetsBlk
416
+ lexer = NexusFile::Lexer.new("
417
+ SETS;
418
+ CHARPARTITION * UNTITLED = Somatic : 1 - 2 4, MM_Genitalia : 5 - 8 10;
419
+
420
+ END;
421
+ BEGIN some other block;")
422
+
423
+ assert foo = lexer.pop(NexusFile::Tokens::SetsBlk)
424
+ assert_equal 'SETS', foo.value.slice(0,4)
425
+ assert_equal 'END;', foo.value.slice(-4,4)
426
+ end
427
+
428
+
429
+
430
+ def test_lexer_errors
431
+ lexer = NexusFile::Lexer.new("*&")
432
+ assert_raise(NexusFile::ParseError) {lexer.peek(NexusFile::Tokens::ID)}
433
+ end
434
+ end
435
+
436
+
437
+ class Test_Parser < Test::Unit::TestCase
438
+ def setup
439
+ # a Mesquite 2.n or higher file
440
+ @nf = File.read('MX_test_03.nex') # MX_test_01.nex
441
+ end
442
+
443
+ def teardown
444
+ @nf = nil
445
+ end
446
+
447
+ def test_that_file_might_be_nexus
448
+ begin
449
+ assert !parse_nexus_file("#Nexblux Begin Natrix end;")
450
+ rescue NexusFile::ParseError
451
+ assert true
452
+ end
453
+ end
454
+
455
+ def test_parse_initializes
456
+ foo = parse_nexus_file(@nf)
457
+ end
458
+
459
+ def test_parse_file
460
+ # this is the major loop, all parts should exist
461
+ foo = parse_nexus_file(@nf)
462
+
463
+ assert_equal 10, foo.taxa.size
464
+ assert_equal 10, foo.characters.size
465
+ assert_equal 10, foo.codings.size
466
+ assert_equal 1, foo.taxa[1].notes.size # asserts that notes are parsing
467
+ assert_equal "norm", foo.characters[0].states["0"].name
468
+ assert_equal "modified", foo.characters[0].states["1"].name
469
+ end
470
+
471
+ def test_parse_authors_blk
472
+ end
473
+
474
+ def test_taxa_block
475
+ # we've popped off the header already
476
+ input =
477
+ "TITLE 'Scharff&Coddington_1997_Araneidae';
478
+ DIMENSIONS NTAX=10;
479
+ TAXLABELS
480
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
481
+ ;
482
+ IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b137 JC1191fcddc2b136 ;
483
+ BLOCKID JC1191fcddc0c4;
484
+ END;"
485
+
486
+ builder = NexusFile::Builder.new
487
+ lexer = NexusFile::Lexer.new(input)
488
+ NexusFile::Parser.new(lexer,builder).parse_taxa_blk
489
+ foo = builder.nexus_file
490
+
491
+ assert_equal 10, foo.taxa.size
492
+ assert_equal "Dictyna", foo.taxa[0].name
493
+ assert_equal "Nephilengys_cruentata", foo.taxa[4].name
494
+ assert_equal "Theridiosoma_01", foo.taxa[8].name
495
+ assert_equal "Tetragnatha", foo.taxa[9].name
496
+ end
497
+
498
+ def test_taxa_block_without_IDS
499
+ # we've popped off the header already
500
+ input =
501
+ "TITLE 'Scharff&Coddington_1997_Araneidae';
502
+ DIMENSIONS NTAX=10;
503
+ TAXLABELS
504
+ Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
505
+ ;
506
+ END;"
507
+
508
+ builder = NexusFile::Builder.new
509
+ lexer = NexusFile::Lexer.new(input)
510
+ NexusFile::Parser.new(lexer,builder).parse_taxa_blk
511
+ foo = builder.nexus_file
512
+
513
+ assert_equal 10, foo.taxa.size
514
+ assert_equal "Dictyna", foo.taxa[0].name
515
+ assert_equal "Nephilengys_cruentata", foo.taxa[4].name
516
+ assert_equal "Theridiosoma_01", foo.taxa[8].name
517
+ assert_equal "Tetragnatha", foo.taxa[9].name
518
+ end
519
+
520
+
521
+
522
+ def test_parse_characters_blk
523
+ input= "
524
+ TITLE 'Scharff&Coddington_1997_Araneidae';
525
+ DIMENSIONS NCHAR=10;
526
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
527
+ CHARSTATELABELS
528
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
529
+ MATRIX
530
+ Dictyna 0?00201001
531
+ Uloborus 0?11000000
532
+ Deinopis 0?01002???
533
+ Nephila&Herennia 0?21010011
534
+ 'Nephilengys_cruentata'0?(0,1)1010(0,1,2)11
535
+ Meta 0?01A10011
536
+ Leucauge_venusta ???--?-??-
537
+ Pachygnatha 0?210(0 1)0011
538
+ 'Theridiosoma_01' ??????????
539
+ Tetragnatha 0?01011011
540
+
541
+ ;
542
+ IDS JC1191fcddc3b425 JC1191fcddc3b426 JC1191fcddc3b427 JC1191fcddc3b428 JC1191fcddc3b429 JC1191fcddc3b430 JC1191fcddc3b431 JC1191fcddc3b432 JC1191fcddc3b433 JC1191fcddc3b434 ;
543
+ BLOCKID JC1191fcddc0c0;
544
+
545
+ END;"
546
+
547
+ builder = NexusFile::Builder.new
548
+ @lexer = NexusFile::Lexer.new(input)
549
+
550
+ # add the taxa, assumes we have them for comparison purposes, though we (shouldn't) ultimately need them
551
+ # foo.taxa = ["Dictyna", "Uloborus", "Deinopis", "Nephila&Herennia", "Nephilenygys_cruentata", "Meta", "Leucauge_venusta", "Pachygnatha", "Theridiosoma_01", "Tetragnatha"]
552
+
553
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
554
+ (0..9).each{|i| builder.stub_taxon}
555
+
556
+ NexusFile::Parser.new(@lexer,builder).parse_characters_blk
557
+ foo = builder.nexus_file
558
+
559
+ assert_equal 10, foo.characters.size
560
+ assert_equal "Tibia_II", foo.characters[0].name
561
+ assert_equal "TII_macrosetae", foo.characters[1].name
562
+
563
+ assert_equal "norm", foo.characters[0].states["0"].name
564
+ assert_equal "modified", foo.characters[0].states["1"].name
565
+
566
+
567
+ # ?!!?
568
+ # foo.characters[0].states["1"].name
569
+ assert_equal ["", "abs", "pres"], foo.characters[9].states.keys.collect{|s| foo.characters[9].states[s].name}.sort
570
+
571
+
572
+ assert_equal ["0","1"], foo.codings[7][5].states
573
+ assert_equal ["?"], foo.codings[9][1].states
574
+ assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
575
+ end
576
+
577
+ def test_characters_block_without_IDs_or_title
578
+ input= "
579
+ DIMENSIONS NCHAR=10;
580
+ FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
581
+ CHARSTATELABELS
582
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs;
583
+ MATRIX
584
+ Dictyna 0?00201001
585
+ Uloborus 0?11000000
586
+ Deinopis 0?01002???
587
+ Nephila&Herennia 0?21010011
588
+ 'Nephilengys_cruentata'0?(0,1)1010(0,1,2)11
589
+ Meta 0?01A10011
590
+ Leucauge_venusta ???--?-??-
591
+ Pachygnatha 0?210(0 1)0011
592
+ 'Theridiosoma_01' ??????????
593
+ Tetragnatha 0?01011011
594
+
595
+ ;
596
+ END;"
597
+
598
+ builder = NexusFile::Builder.new
599
+ @lexer = NexusFile::Lexer.new(input)
600
+
601
+ # add the taxa, assumes we have them for comparison purposes, though we (shouldn't) ultimately need them
602
+ # foo.taxa = ["Dictyna", "Uloborus", "Deinopis", "Nephila&Herennia", "Nephilenygys_cruentata", "Meta", "Leucauge_venusta", "Pachygnatha", "Theridiosoma_01", "Tetragnatha"]
603
+
604
+ # stub the taxa, they would otherwise get added in dimensions or taxa block
605
+ (0..9).each{|i| builder.stub_taxon}
606
+
607
+ NexusFile::Parser.new(@lexer,builder).parse_characters_blk
608
+ foo = builder.nexus_file
609
+
610
+ assert_equal 10, foo.characters.size
611
+ assert_equal "Tibia_II", foo.characters[0].name
612
+ assert_equal "TII_macrosetae", foo.characters[1].name
613
+ assert_equal "norm", foo.characters[0].states["0"].name
614
+ assert_equal "modified", foo.characters[0].states["1"].name
615
+ assert_equal ["", "abs", "pres"], foo.characters[9].states.keys.collect{|s| foo.characters[9].states[s].name}.sort
616
+ assert_equal ["0","1"], foo.codings[7][5].states
617
+ assert_equal ["?"], foo.codings[9][1].states
618
+ assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
619
+ end
620
+
621
+ def test_characters_block_from_file
622
+ foo = parse_nexus_file(@nf)
623
+ assert 10, foo.characters.size
624
+ end
625
+
626
+ def test_codings
627
+ foo = parse_nexus_file(@nf)
628
+ assert 100, foo.codings.size # two multistates count in single cells
629
+ end
630
+
631
+ def test_parse_dimensions
632
+ input= " DIMENSIONS NCHAR=10 ntaxa =10 nfoo='999' nbar = \" a b c \" blorf=2; "
633
+ builder = NexusFile::Builder.new
634
+ lexer = NexusFile::Lexer.new(input)
635
+
636
+ NexusFile::Parser.new(lexer,builder).parse_dimensions
637
+ foo = builder.nexus_file
638
+
639
+ assert_equal "10", foo.vars[:nchar]
640
+ assert_equal "10", foo.vars[:ntaxa]
641
+ assert_equal "999", foo.vars[:nfoo]
642
+ assert_equal 'a b c', foo.vars[:nbar]
643
+ assert_equal '2', foo.vars[:blorf]
644
+ # add test that nothing is left in lexer
645
+ end
646
+
647
+ def test_parse_format
648
+ input = "FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
649
+ builder = NexusFile::Builder.new
650
+ lexer = NexusFile::Lexer.new(input)
651
+
652
+ NexusFile::Parser.new(lexer,builder).parse_format
653
+ foo = builder.nexus_file
654
+
655
+ assert_equal "STANDARD", foo.vars[:datatype]
656
+ assert_equal "-", foo.vars[:gap]
657
+ assert_equal "?", foo.vars[:missing]
658
+ assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
659
+ # add test that nothing is left in lexer
660
+ end
661
+
662
+ def test_parse_chr_state_labels
663
+ input =" CHARSTATELABELS
664
+ 1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
665
+ MATRIX
666
+ fooo 01 more stuff here that should not be hit"
667
+
668
+ builder = NexusFile::Builder.new
669
+ lexer = NexusFile::Lexer.new(input)
670
+
671
+ (0..9).each{builder.stub_chr()}
672
+
673
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
674
+
675
+ foo = builder.nexus_file
676
+ assert_equal 10, foo.characters.size
677
+ assert_equal "Tibia_II", foo.characters[0].name
678
+ assert_equal "norm", foo.characters[0].states["0"].name
679
+ assert_equal "modified", foo.characters[0].states["1"].name
680
+
681
+ assert_equal "TII_macrosetae", foo.characters[1].name
682
+ assert_equal "= TI", foo.characters[1].states["0"].name
683
+ assert_equal "stronger", foo.characters[1].states["1"].name
684
+
685
+ assert_equal "Femoral_tuber", foo.characters[2].name
686
+ assert_equal "abs", foo.characters[2].states["0"].name
687
+ assert_equal "pres", foo.characters[2].states["1"].name
688
+ assert_equal "m-setae", foo.characters[2].states["2"].name
689
+
690
+ assert_equal "Undefined", foo.characters[3].name
691
+ assert_equal 0, foo.characters[3].states.keys.size
692
+
693
+ assert_equal "Cymbium", foo.characters[4].name
694
+ assert_equal "dorsal", foo.characters[4].states["0"].name
695
+ assert_equal "mesal", foo.characters[4].states["1"].name
696
+ assert_equal "lateral", foo.characters[4].states["2"].name
697
+
698
+ assert_equal "Paracymbium", foo.characters[5].name
699
+ assert_equal "abs", foo.characters[5].states["0"].name
700
+ assert_equal "pres", foo.characters[5].states["1"].name
701
+
702
+ assert_equal "Globular_tegulum", foo.characters[6].name
703
+ assert_equal "abs", foo.characters[6].states["0"].name
704
+ assert_equal "pres", foo.characters[6].states["1"].name
705
+
706
+ assert_equal "Undefined", foo.characters[7].name
707
+ assert_equal "entire", foo.characters[7].states["0"].name
708
+ assert_equal "w_lobe", foo.characters[7].states["1"].name
709
+
710
+ # ...
711
+
712
+ assert_equal "Median_apophysis", foo.characters[9].name
713
+ assert_equal "pres", foo.characters[9].states["0"].name
714
+ assert_equal "abs", foo.characters[9].states["1"].name
715
+ end
716
+
717
+ def test_strange_chr_state_labels
718
+ input =" CHARSTATELABELS
719
+ 29 'Metatarsal trichobothria (CodAra.29)' / 37623 '>2', 30 'Spinneret cuticle (CodAra.30)' / annulate ridged squamate;
720
+ Matrix
721
+ fooo 01 more stuff here that should not be hit"
722
+
723
+ builder = NexusFile::Builder.new
724
+ lexer = NexusFile::Lexer.new(input)
725
+
726
+ (0..29).each{builder.stub_chr()}
727
+
728
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
729
+
730
+ foo = builder.nexus_file
731
+
732
+ assert_equal "Metatarsal trichobothria (CodAra.29)", foo.characters[28].name
733
+ assert_equal "37623", foo.characters[28].states["0"].name
734
+ assert_equal ">2", foo.characters[28].states["1"].name
735
+
736
+ assert_equal "Spinneret cuticle (CodAra.30)", foo.characters[29].name
737
+ assert_equal "annulate", foo.characters[29].states["0"].name
738
+ assert_equal "ridged", foo.characters[29].states["1"].name
739
+ assert_equal "squamate", foo.characters[29].states["2"].name
740
+
741
+ end
742
+
743
+ def DONT_test_parse_really_long_string_of_chr_state_labels
744
+ input =" CHARSTATELABELS
745
+ 1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
746
+ MATRIX
747
+ fooo 01 more stuff here that should not be hit"
748
+
749
+ builder = NexusFile::Builder.new
750
+ lexer = NexusFile::Lexer.new(input)
751
+
752
+ (0..147).each{builder.stub_chr()}
753
+
754
+ NexusFile::Parser.new(lexer,builder).parse_chr_state_labels
755
+
756
+ foo = builder.nexus_file
757
+ assert_equal 10, foo.characters.size
758
+ assert_equal "Tibia_II", foo.characters[0].name
759
+ assert_equal "norm", foo.characters[0].states["0"].name
760
+ assert_equal "modified", foo.characters[0].states["1"].name
761
+
762
+ assert_equal "TII_macrosetae", foo.characters[1].name
763
+ assert_equal "= TI", foo.characters[1].states["0"].name
764
+ assert_equal "stronger", foo.characters[1].states["1"].name
765
+
766
+ assert_equal "Femoral_tuber", foo.characters[2].name
767
+ assert_equal "abs", foo.characters[2].states["0"].name
768
+ assert_equal "pres", foo.characters[2].states["1"].name
769
+ assert_equal "m-setae", foo.characters[2].states["2"].name
770
+
771
+ assert_equal "Undefined", foo.characters[3].name
772
+ assert_equal 0, foo.characters[3].states.keys.size
773
+
774
+ assert_equal "Cymbium", foo.characters[4].name
775
+ assert_equal "dorsal", foo.characters[4].states["0"].name
776
+ assert_equal "mesal", foo.characters[4].states["1"].name
777
+ assert_equal "lateral", foo.characters[4].states["2"].name
778
+
779
+ assert_equal "Paracymbium", foo.characters[5].name
780
+ assert_equal "abs", foo.characters[5].states["0"].name
781
+ assert_equal "pres", foo.characters[5].states["1"].name
782
+
783
+ assert_equal "Globular_tegulum", foo.characters[6].name
784
+ assert_equal "abs", foo.characters[6].states["0"].name
785
+ assert_equal "pres", foo.characters[6].states["1"].name
786
+
787
+ assert_equal "Undefined", foo.characters[7].name
788
+ assert_equal "entire", foo.characters[7].states["0"].name
789
+ assert_equal "w_lobe", foo.characters[7].states["1"].name
790
+
791
+ # ...
792
+
793
+ assert_equal "Median_apophysis", foo.characters[9].name
794
+ assert_equal "pres", foo.characters[9].states["0"].name
795
+ assert_equal "abs", foo.characters[9].states["1"].name
796
+ end
797
+
798
+
799
+
800
+ def test_parse_notes_blk
801
+ input ="
802
+ TEXT TAXA = 'Scharff&Coddington_1997_Araneidae' TAXON = 2 TEXT = 'This is a footnote to taxon 2, Uloborus';
803
+
804
+ TEXT TAXON = 4 CHARACTER = 8 TEXT = This_is_a_footnote_to_a_cell.;
805
+
806
+ TEXT CHARACTER = 10 TEXT = This_is_footnote_to_char_10;
807
+
808
+ TEXT FILE TEXT = 'Scharff, N. and J. A. Coddington. 1997. A phylogenetic analysis of the orb-weaving spider family Araneidae (Arachnida, Araneae). Zool. J. Linn. Soc. 120(4): 355?434';
809
+
810
+ AN T = 4 A = JC DC = 2008.4.13.20.31.19 DM = 2008.4.13.20.31.38 ID = 01194a57d0161 I = _ TF = (CM 'This is an \"annotation\" to taxon 4') ;
811
+
812
+ AN C = 4 A = JC DC = 2008.4.13.20.31.50 DM = 2008.4.13.20.32.10 ID = 01194a584b9f2 I = _ TF = (CM 'This is an annotation to charcter 4, that has no name.') ;
813
+
814
+ AN T = 9 C = 3 A = 0 DC = 2008.4.20.17.24.36 DM = 2008.4.20.17.25.4 ID = 01196db963874 I = _ TF = (CM 'This is an annotation to chr 3, taxa 9, coded ?') ;
815
+
816
+ AN T = 2 C = 6 A = JC DC = 2008.4.13.20.35.20 DM = 2008.4.13.20.35.36 ID = JC1194a5b7e1a3 I = _ TF = (CM 'This is an annotation that haa a hard return in it^n^n^n^nSo there!') ;
817
+
818
+ AN T = 7 C = 10 A = 0 DC = 2008.4.20.17.25.11 DM = 2008.4.20.17.26.1 ID = 01196db9ebd25 I = _ TF = (CM 'this is an annotation^nwith several hard returns^nfor a cell of taxa 6, chr 9 (from zero)^ncoded as -') ;
819
+
820
+ AN T = 2 C = 6 A = JC DC = 2008.4.13.20.35.20 DM = 2008.4.13.20.35.36 ID = JC1194a5b7e1a3 I = _ TF = (CM 'This is ANOTHER annotation that haa a hard return in it^n^n^n^nSo there!') ;
821
+
822
+ END; Don't parse this bit, eh?"
823
+
824
+ # note the second last note note embedds parens in the value
825
+
826
+ builder = NexusFile::Builder.new
827
+ lexer = NexusFile::Lexer.new(input)
828
+
829
+ # stubs
830
+ (0..9).each{builder.stub_chr()}
831
+ (0..9).each{builder.stub_taxon()}
832
+ builder.nexus_file.codings[3] = []
833
+ builder.nexus_file.codings[3][7] = NexusFile::NexusFile::Coding.new()
834
+ builder.nexus_file.codings[8] = []
835
+ builder.nexus_file.codings[8][2] = NexusFile::NexusFile::Coding.new()
836
+ builder.nexus_file.codings[1] = []
837
+ builder.nexus_file.codings[1][5] = NexusFile::NexusFile::Coding.new()
838
+ builder.nexus_file.codings[6] = []
839
+ builder.nexus_file.codings[6][9] = NexusFile::NexusFile::Coding.new()
840
+ builder.nexus_file.codings[3] = []
841
+ builder.nexus_file.codings[3][7] = NexusFile::NexusFile::Coding.new()
842
+
843
+ NexusFile::Parser.new(lexer,builder).parse_notes_blk
844
+
845
+ foo = builder.nexus_file
846
+
847
+ # make sure stubs are setup
848
+ assert_equal 10, foo.characters.size
849
+ assert_equal 10, foo.taxa.size
850
+
851
+ assert_equal 1, foo.taxa[1].notes.size
852
+ assert_equal 1, foo.codings[3][7].notes.size
853
+ assert_equal 'This_is_a_footnote_to_a_cell.', foo.codings[3][7].notes[0].note
854
+
855
+ assert_equal 1, foo.characters[9].notes.size
856
+ assert_equal 'This_is_footnote_to_char_10', foo.characters[9].notes[0].note
857
+
858
+ assert_equal 1, foo.notes.size
859
+ assert_equal 'Scharff, N. and J. A. Coddington. 1997. A phylogenetic analysis of the orb-weaving spider family Araneidae (Arachnida, Araneae). Zool. J. Linn. Soc. 120(4): 355?434', foo.notes[0].note
860
+
861
+ assert_equal 1, foo.taxa[3].notes.size
862
+ assert_equal 1, foo.characters[3].notes.size
863
+ assert_equal 1, foo.codings[8][2].notes.size
864
+ assert_equal 1, foo.codings[6][9].notes.size
865
+ assert_equal 2, foo.codings[1][5].notes.size # TWO!!
866
+ assert_equal 1, foo.codings[3][7].notes.size
867
+
868
+
869
+ assert_equal "This_is_a_footnote_to_a_cell.", foo.codings[3][7].notes[0].note
870
+
871
+ assert_equal "This is an annotation to chr 3, taxa 9, coded ?", foo.codings[8][2].notes[0].note
872
+ assert_equal "This is an annotation that haa a hard return in it^n^n^n^nSo there!", foo.codings[1][5].notes[0].note
873
+ assert_equal "this is an annotation^nwith several hard returns^nfor a cell of taxa 6, chr 9 (from zero)^ncoded as -", foo.codings[6][9].notes[0].note
874
+ assert_equal "This is ANOTHER annotation that haa a hard return in it^n^n^n^nSo there!", foo.codings[1][5].notes[1].note
875
+
876
+ end
877
+
878
+ def test_notes_block_2
879
+ input="
880
+ TEXT CHARACTER = 1 TEXT = A62.001;
881
+ TEXT CHARACTER = 2 TEXT = A62.002;
882
+ TEXT CHARACTER = 3 TEXT = A62.003;
883
+ TEXT CHARACTER = 4 TEXT = A62.004;
884
+ TEXT CHARACTER = 5 TEXT = A62.005;
885
+ TEXT CHARACTER = 6 TEXT = A62.006;
886
+ TEXT CHARACTER = 7 TEXT = A62.007;
887
+ TEXT CHARACTER = 8 TEXT = A62.008;
888
+ end;
889
+ "
890
+
891
+ # note the second last note note embeds parens in the value
892
+
893
+ builder = NexusFile::Builder.new
894
+ lexer = NexusFile::Lexer.new(input)
895
+ # stubs
896
+ (0..9).each{builder.stub_chr()}
897
+
898
+ NexusFile::Parser.new(lexer,builder).parse_notes_blk
899
+
900
+ foo = builder.nexus_file
901
+
902
+ # make sure stubs are setup
903
+ assert_equal 10, foo.characters.size
904
+
905
+ assert_equal 'A62.001', foo.characters[0].notes[0].note
906
+ assert_equal 'A62.002', foo.characters[1].notes[0].note
907
+ assert_equal 'A62.003', foo.characters[2].notes[0].note
908
+ assert_equal 'A62.004', foo.characters[3].notes[0].note
909
+ assert_equal 'A62.005', foo.characters[4].notes[0].note
910
+ assert_equal 'A62.006', foo.characters[5].notes[0].note
911
+ assert_equal 'A62.007', foo.characters[6].notes[0].note
912
+ assert_equal 'A62.008', foo.characters[7].notes[0].note
913
+ assert_equal NexusFile::NexusFile::Character, foo.characters[7].class
914
+ assert_equal 1, foo.characters[7].notes.size
915
+ end
916
+
917
+
918
+ def test_parse_trees_block
919
+ end
920
+
921
+ def test_parse_labels_block
922
+ end
923
+
924
+ def test_parse_sets_block
925
+ end
926
+
927
+ def test_parse_assumptions_block
928
+ end
929
+
930
+ def DONT_test_misc
931
+ nf = File.read('foo.nex') # MX_test_01.nex
932
+ foo = parse_nexus_file(nf)
933
+ assert true, foo
934
+ end
935
+
936
+ end
937
+