nexus_parser 1.2.1 → 1.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/nexus_parser/lexer.rb +0 -10
- data/lib/nexus_parser/parser.rb +142 -75
- data/lib/nexus_parser/tokens.rb +70 -75
- data/lib/nexus_parser/version.rb +1 -1
- data/lib/nexus_parser.rb +38 -11
- data/test/test_nexus_parser.rb +347 -20
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4229e2b23de12e3ef92bc88a83aa04805d3884ca09019aaab843846f58fef964
|
4
|
+
data.tar.gz: 7973b5f04b84eea945ce632e5b20844a82c02a9a90c3c18a5ae4bbdaa97376c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd2739e8dcf4b84287f325a6443227b0b669a45f38c23a20f32bf30cfe88ac7eb34b5a6af6b0929c9af7b55c21b9096e683543528858527920dccfadff10d425
|
7
|
+
data.tar.gz: 40780dadb8ddc80554ca199e6ea9f0ffb5672db51c66c1b41390a25cf4f4c39e2b27799f644a293135b848b9feb3af0fc4cab338e5fe7f40ba670dcaad384965
|
data/lib/nexus_parser/lexer.rb
CHANGED
@@ -33,18 +33,8 @@ class NexusParser::Lexer
|
|
33
33
|
if @next_token
|
34
34
|
return @next_token
|
35
35
|
else
|
36
|
-
# check for a match on the specified class first
|
37
36
|
if match(token_class)
|
38
37
|
return @next_token
|
39
|
-
else
|
40
|
-
# now check all the tokens for a match
|
41
|
-
NexusParser::Tokens.nexus_file_token_list.each {|t|
|
42
|
-
return @next_token if match(t)
|
43
|
-
}
|
44
|
-
end
|
45
|
-
# no match, either end of string or lex-error
|
46
|
-
if @input != ''
|
47
|
-
raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
48
38
|
else
|
49
39
|
return nil
|
50
40
|
end
|
data/lib/nexus_parser/parser.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
|
2
2
|
class NexusParser::Parser
|
3
|
-
|
3
|
+
|
4
4
|
def initialize(lexer, builder)
|
5
5
|
@lexer = lexer
|
6
6
|
@builder = builder
|
@@ -10,41 +10,41 @@ class NexusParser::Parser
|
|
10
10
|
# nf = @builder.new_nexus_file # create new local NexusParser instance, nf
|
11
11
|
# blks = []
|
12
12
|
@lexer.pop(NexusParser::Tokens::NexusStart)
|
13
|
-
|
13
|
+
|
14
14
|
while @lexer.peek(NexusParser::Tokens::BeginBlk)
|
15
|
-
|
15
|
+
|
16
16
|
@lexer.pop(NexusParser::Tokens::BeginBlk) # pop it
|
17
|
-
|
17
|
+
|
18
18
|
if @lexer.peek(NexusParser::Tokens::AuthorsBlk)
|
19
19
|
parse_authors_blk
|
20
|
-
|
21
|
-
# we parse these below
|
20
|
+
|
21
|
+
# we parse these below
|
22
22
|
elsif @lexer.peek(NexusParser::Tokens::TaxaBlk)
|
23
|
-
|
23
|
+
|
24
24
|
@lexer.pop(NexusParser::Tokens::TaxaBlk )
|
25
25
|
parse_taxa_blk
|
26
|
-
|
26
|
+
|
27
27
|
elsif @lexer.peek(NexusParser::Tokens::ChrsBlk)
|
28
28
|
@lexer.pop(NexusParser::Tokens::ChrsBlk)
|
29
29
|
parse_characters_blk
|
30
30
|
|
31
31
|
elsif @lexer.peek(NexusParser::Tokens::NotesBlk)
|
32
|
-
@lexer.pop(NexusParser::Tokens::NotesBlk)
|
32
|
+
@lexer.pop(NexusParser::Tokens::NotesBlk)
|
33
33
|
parse_notes_blk
|
34
34
|
|
35
35
|
# we should parse this
|
36
36
|
elsif @lexer.peek(NexusParser::Tokens::SetsBlk)
|
37
37
|
@lexer.pop(NexusParser::Tokens::SetsBlk)
|
38
38
|
|
39
|
-
# we don't parse these
|
39
|
+
# we don't parse these
|
40
40
|
elsif @lexer.peek(NexusParser::Tokens::TreesBlk)
|
41
41
|
@foo = @lexer.pop(NexusParser::Tokens::TreesBlk).value
|
42
|
-
|
42
|
+
|
43
43
|
elsif @lexer.peek(NexusParser::Tokens::LabelsBlk)
|
44
44
|
@lexer.pop(NexusParser::Tokens::LabelsBlk)
|
45
|
-
|
45
|
+
|
46
46
|
elsif @lexer.peek(NexusParser::Tokens::MqCharModelsBlk)
|
47
|
-
@lexer.pop(NexusParser::Tokens::MqCharModelsBlk)
|
47
|
+
@lexer.pop(NexusParser::Tokens::MqCharModelsBlk)
|
48
48
|
|
49
49
|
elsif @lexer.peek(NexusParser::Tokens::AssumptionsBlk)
|
50
50
|
@lexer.pop(NexusParser::Tokens::AssumptionsBlk)
|
@@ -52,7 +52,7 @@ class NexusParser::Parser
|
|
52
52
|
elsif @lexer.peek(NexusParser::Tokens::CodonsBlk)
|
53
53
|
@lexer.pop(NexusParser::Tokens::CodonsBlk)
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
@@ -70,15 +70,15 @@ class NexusParser::Parser
|
|
70
70
|
|
71
71
|
# while @lexer.peek(NexusParser::Tokens::ValuePair)
|
72
72
|
# # IMPORTANT, these are going to a general hash, there may ultimately be overlap of keys used in different blocks, this is ignored at present
|
73
|
-
# @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
|
73
|
+
# @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
|
74
74
|
# end
|
75
|
-
|
75
|
+
|
76
76
|
#@lexer.pop(NexusParser::Tokens::ID) if @lexer.peek(NexusParser::Tokens::ID)
|
77
77
|
# end
|
78
78
|
#end
|
79
79
|
end
|
80
80
|
|
81
|
-
def parse_taxa_blk
|
81
|
+
def parse_taxa_blk
|
82
82
|
@lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title)
|
83
83
|
|
84
84
|
# need to not ignore to test against
|
@@ -88,7 +88,7 @@ class NexusParser::Parser
|
|
88
88
|
while true
|
89
89
|
inf += 1
|
90
90
|
raise(NexusParser::ParseError,"Either you have a gazillion taxa or more likely the parser is caught in an infinite loop trying to parser taxon labels. Check for double single quotes in this block.") if inf > 100000
|
91
|
-
|
91
|
+
|
92
92
|
if @lexer.peek(NexusParser::Tokens::EndBlk)
|
93
93
|
@lexer.pop(NexusParser::Tokens::EndBlk)
|
94
94
|
break
|
@@ -98,51 +98,53 @@ class NexusParser::Parser
|
|
98
98
|
@lexer.pop(NexusParser::Tokens::Taxlabels) if @lexer.peek(NexusParser::Tokens::Taxlabels)
|
99
99
|
i = 0
|
100
100
|
while @lexer.peek(NexusParser::Tokens::Label)
|
101
|
-
@builder.update_taxon(:index => i, :name => @lexer.pop(NexusParser::Tokens::Label).value)
|
101
|
+
@builder.update_taxon(:index => i, :name => @lexer.pop(NexusParser::Tokens::Label).value)
|
102
102
|
i += 1
|
103
|
-
end
|
103
|
+
end
|
104
104
|
@lexer.pop(NexusParser::Tokens::SemiColon) if @lexer.peek(NexusParser::Tokens::SemiColon) # close of tax labels, placement of this seems dubious... but tests are working
|
105
|
-
|
105
|
+
|
106
106
|
elsif @lexer.peek(NexusParser::Tokens::MesquiteIDs)
|
107
107
|
|
108
108
|
@lexer.pop(NexusParser::Tokens::MesquiteIDs) # trashing these for now
|
109
109
|
elsif @lexer.peek(NexusParser::Tokens::MesquiteBlockID)
|
110
|
-
@lexer.pop(NexusParser::Tokens::MesquiteBlockID)
|
110
|
+
@lexer.pop(NexusParser::Tokens::MesquiteBlockID)
|
111
111
|
end
|
112
|
-
|
112
|
+
|
113
113
|
end
|
114
114
|
end
|
115
115
|
|
116
116
|
|
117
117
|
end
|
118
118
|
|
119
|
-
def parse_characters_blk
|
120
|
-
|
121
|
-
inf = 0
|
119
|
+
def parse_characters_blk
|
120
|
+
|
121
|
+
inf = 0
|
122
122
|
while true
|
123
123
|
inf += 1
|
124
124
|
raise(NexusParser::ParseError,"Either you have a gazillion characters or more likely the parser is caught in an infinite loop trying to parser character data. Check for double single quotes in this block.") if inf > 100000
|
125
125
|
|
126
126
|
if @lexer.peek(NexusParser::Tokens::EndBlk) # we're at the end of the block, exit after geting rid of the semi-colon
|
127
|
-
break
|
127
|
+
break
|
128
128
|
else
|
129
129
|
@lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title) # not used at present
|
130
130
|
@lexer.pop(NexusParser::Tokens::LinkLine) if @lexer.peek(NexusParser::Tokens::LinkLine) # trashing these for now
|
131
|
-
|
131
|
+
|
132
132
|
parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)
|
133
|
-
parse_format if @lexer.peek(NexusParser::Tokens::Format)
|
134
|
-
|
133
|
+
parse_format if @lexer.peek(NexusParser::Tokens::Format)
|
134
|
+
|
135
135
|
parse_chr_state_labels if @lexer.peek(NexusParser::Tokens::CharStateLabels)
|
136
136
|
|
137
|
-
|
138
|
-
|
137
|
+
parse_chr_labels if @lexer.peek(NexusParser::Tokens::CharLabels)
|
138
|
+
|
139
|
+
parse_state_labels if @lexer.peek(NexusParser::Tokens::StateLabels)
|
140
|
+
|
141
|
+
parse_matrix if @lexer.peek(NexusParser::Tokens::Matrix)
|
142
|
+
|
139
143
|
# handle "\s*OPTIONS MSTAXA = UNCERTAIN;\s\n" within a characters block (sticks in an infinite loop right now)
|
140
144
|
|
141
145
|
|
142
146
|
@lexer.pop(NexusParser::Tokens::MesquiteIDs) if @lexer.peek(NexusParser::Tokens::MesquiteIDs) # trashing these for now
|
143
147
|
@lexer.pop(NexusParser::Tokens::MesquiteBlockID) if @lexer.peek(NexusParser::Tokens::MesquiteBlockID) # trashing these for now
|
144
|
-
|
145
|
-
false
|
146
148
|
end
|
147
149
|
end
|
148
150
|
@lexer.pop(NexusParser::Tokens::EndBlk)
|
@@ -150,7 +152,7 @@ class NexusParser::Parser
|
|
150
152
|
|
151
153
|
# prolly pop header then fuse with parse_dimensions
|
152
154
|
def parse_format
|
153
|
-
@lexer.pop(NexusParser::Tokens::Format)
|
155
|
+
@lexer.pop(NexusParser::Tokens::Format)
|
154
156
|
|
155
157
|
while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
|
156
158
|
@lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
|
@@ -160,13 +162,13 @@ class NexusParser::Parser
|
|
160
162
|
check_initialization_of_ntax_nchar
|
161
163
|
end
|
162
164
|
|
163
|
-
def parse_dimensions
|
165
|
+
def parse_dimensions
|
164
166
|
@lexer.pop(NexusParser::Tokens::Dimensions)
|
165
167
|
while @lexer.peek(NexusParser::Tokens::ValuePair)
|
166
168
|
@builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
|
167
169
|
end
|
168
170
|
# the last value pair with a ; is automagically handled, don't try popping it again
|
169
|
-
|
171
|
+
|
170
172
|
check_initialization_of_ntax_nchar
|
171
173
|
end
|
172
174
|
|
@@ -175,7 +177,7 @@ class NexusParser::Parser
|
|
175
177
|
if @builder.nexus_file.vars[:nchar] && @builder.nexus_file.characters == []
|
176
178
|
(0..(@builder.nexus_file.vars[:nchar].to_i - 1)).each {|i| @builder.stub_chr }
|
177
179
|
end
|
178
|
-
|
180
|
+
|
179
181
|
# check for taxa dimensions, if otherwise not set generate them
|
180
182
|
if @builder.nexus_file.vars[:ntax] && @builder.nexus_file.taxa == []
|
181
183
|
(0..(@builder.nexus_file.vars[:ntax].to_i - 1)).each {|i| @builder.stub_taxon }
|
@@ -184,45 +186,108 @@ class NexusParser::Parser
|
|
184
186
|
|
185
187
|
def parse_chr_state_labels
|
186
188
|
@lexer.pop(NexusParser::Tokens::CharStateLabels)
|
187
|
-
|
188
|
-
inf = 0
|
189
|
+
|
190
|
+
inf = 0
|
189
191
|
while true
|
190
192
|
inf += 1
|
191
193
|
raise(NexusParser::ParseError,"Either you have a gazillion character state labels or more likely the parser is caught in an infinite loop while trying to parser character state labels. Check for double single quotes in this block.") if inf > 100000
|
192
194
|
|
193
|
-
if @lexer.peek(NexusParser::Tokens::SemiColon)
|
194
|
-
break
|
195
|
+
if @lexer.peek(NexusParser::Tokens::SemiColon)
|
196
|
+
break
|
195
197
|
else
|
196
198
|
opts = {}
|
197
|
-
|
198
199
|
name = ""
|
199
|
-
|
200
|
-
|
200
|
+
|
201
|
+
index = @lexer.pop(NexusParser::Tokens::PositiveInteger).value.to_i
|
202
|
+
|
203
|
+
(name = @lexer.pop(NexusParser::Tokens::CharacterLabel).value) if @lexer.peek(NexusParser::Tokens::CharacterLabel) # not always given a letter
|
201
204
|
|
202
205
|
@lexer.pop(NexusParser::Tokens::BckSlash) if @lexer.peek(NexusParser::Tokens::BckSlash)
|
203
206
|
|
204
207
|
if !@lexer.peek(NexusParser::Tokens::Comma) || !@lexer.peek(NexusParser::Tokens::SemiColon)
|
205
208
|
i = 0
|
206
209
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
210
|
+
while @lexer.peek(NexusParser::Tokens::CharacterLabel)
|
211
|
+
opts.update({
|
212
|
+
i.to_s => @lexer.pop(NexusParser::Tokens::CharacterLabel).value
|
213
|
+
})
|
211
214
|
|
212
215
|
i += 1
|
213
|
-
end
|
216
|
+
end
|
214
217
|
end
|
215
218
|
|
216
219
|
@lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon
|
217
|
-
|
220
|
+
|
218
221
|
opts.update({:index => (index - 1), :name => name})
|
219
|
-
|
222
|
+
|
220
223
|
raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
|
221
224
|
@builder.update_chr(opts)
|
222
|
-
end
|
225
|
+
end
|
223
226
|
|
224
227
|
end
|
225
|
-
@lexer.pop(NexusParser::Tokens::SemiColon)
|
228
|
+
@lexer.pop(NexusParser::Tokens::SemiColon)
|
229
|
+
end
|
230
|
+
|
231
|
+
def parse_chr_labels
|
232
|
+
@lexer.pop(NexusParser::Tokens::CharLabels)
|
233
|
+
|
234
|
+
inf = 0
|
235
|
+
while true
|
236
|
+
inf += 1
|
237
|
+
raise(NexusParser::ParseError,"Either you have a gazillion character labels or more likely the parser is caught in an infinite loop while trying to parse character labels. Check for double single quotes in this block.") if inf > 100000
|
238
|
+
|
239
|
+
if @lexer.peek(NexusParser::Tokens::SemiColon)
|
240
|
+
break
|
241
|
+
else
|
242
|
+
i = 0
|
243
|
+
while @lexer.peek(NexusParser::Tokens::CharacterLabel)
|
244
|
+
@builder.update_chr_name(
|
245
|
+
i, @lexer.pop(NexusParser::Tokens::CharacterLabel).value
|
246
|
+
)
|
247
|
+
|
248
|
+
i += 1
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
@lexer.pop(NexusParser::Tokens::SemiColon)
|
253
|
+
end
|
254
|
+
|
255
|
+
def parse_state_labels
|
256
|
+
@lexer.pop(NexusParser::Tokens::StateLabels)
|
257
|
+
|
258
|
+
inf = 0
|
259
|
+
while true
|
260
|
+
inf += 1
|
261
|
+
raise(NexusParser::ParseError,"Either you have a gazillion state labels or more likely the parser is caught in an infinite loop while trying to parse state labels. Check for double single quotes in this block.") if inf > 100000
|
262
|
+
|
263
|
+
if @lexer.peek(NexusParser::Tokens::SemiColon)
|
264
|
+
break
|
265
|
+
else
|
266
|
+
opts = {}
|
267
|
+
|
268
|
+
index = @lexer.pop(NexusParser::Tokens::PositiveInteger).value.to_i
|
269
|
+
|
270
|
+
if !@lexer.peek(NexusParser::Tokens::Comma) && !@lexer.peek(NexusParser::Tokens::SemiColon)
|
271
|
+
i = 0
|
272
|
+
|
273
|
+
while @lexer.peek(NexusParser::Tokens::CharacterLabel)
|
274
|
+
opts.update({
|
275
|
+
i.to_s => @lexer.pop(NexusParser::Tokens::CharacterLabel).value
|
276
|
+
})
|
277
|
+
|
278
|
+
i += 1
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
@lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon
|
283
|
+
|
284
|
+
opts.update({:index => (index - 1)})
|
285
|
+
|
286
|
+
@builder.update_chr_states(opts)
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|
290
|
+
@lexer.pop(NexusParser::Tokens::SemiColon)
|
226
291
|
end
|
227
292
|
|
228
293
|
def parse_matrix
|
@@ -230,25 +295,25 @@ class NexusParser::Parser
|
|
230
295
|
i = 0
|
231
296
|
while true
|
232
297
|
if @lexer.peek(NexusParser::Tokens::SemiColon)
|
233
|
-
break
|
298
|
+
break
|
234
299
|
else
|
235
300
|
t = @lexer.pop(NexusParser::Tokens::Label).value
|
236
301
|
|
237
302
|
@builder.update_taxon(:index => i, :name => t) # if it exists its not re-added
|
238
303
|
|
239
304
|
@builder.code_row(i, @lexer.pop(NexusParser::Tokens::RowVec).value)
|
240
|
-
|
305
|
+
|
241
306
|
i += 1
|
242
307
|
end
|
243
308
|
end
|
244
|
-
@lexer.pop(NexusParser::Tokens::SemiColon) # pop the semicolon
|
309
|
+
@lexer.pop(NexusParser::Tokens::SemiColon) # pop the semicolon
|
245
310
|
end
|
246
311
|
|
247
312
|
# this suck(s/ed), it needs work when a better API for Mesquite comes out
|
248
313
|
def parse_notes_blk
|
249
314
|
# IMPORTANT - we don't parse the (CM <note>), we just strip the "(CM" ... ")" bit for now in NexusParser::Note
|
250
315
|
|
251
|
-
@vars = {}
|
316
|
+
@vars = {}
|
252
317
|
inf = 0 # a crude iteration checker
|
253
318
|
while true
|
254
319
|
inf += 1
|
@@ -261,18 +326,20 @@ class NexusParser::Parser
|
|
261
326
|
|
262
327
|
if @lexer.peek(NexusParser::Tokens::ValuePair)
|
263
328
|
@vars.update(@lexer.pop(NexusParser::Tokens::ValuePair).value)
|
264
|
-
|
265
|
-
elsif @lexer.peek(NexusParser::Tokens::
|
266
|
-
|
267
|
-
|
329
|
+
|
330
|
+
elsif @lexer.peek(NexusParser::Tokens::FileLbl)
|
331
|
+
@lexer.pop(NexusParser::Tokens::FileLbl)
|
332
|
+
@vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
|
333
|
+
|
334
|
+
else @lexer.peek(NexusParser::Tokens::Label)
|
335
|
+
# If we already have a :type set then the Label we just peeked starts a
|
336
|
+
# new row, so write the current one and then start a new one.
|
337
|
+
if @vars[:type]
|
268
338
|
@builder.add_note(@vars)
|
269
339
|
@vars = {}
|
270
|
-
else
|
271
|
-
@vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
|
272
340
|
end
|
273
|
-
|
274
|
-
@lexer.pop(NexusParser::Tokens::
|
275
|
-
@vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
|
341
|
+
|
342
|
+
@vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
|
276
343
|
end
|
277
344
|
end
|
278
345
|
end
|
@@ -280,9 +347,9 @@ class NexusParser::Parser
|
|
280
347
|
|
281
348
|
#@vars = {}
|
282
349
|
#while true
|
283
|
-
|
284
|
-
# break if @lexer.peek(NexusParser::Tokens::EndBlk)
|
285
|
-
|
350
|
+
|
351
|
+
# break if @lexer.peek(NexusParser::Tokens::EndBlk)
|
352
|
+
|
286
353
|
# @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
|
287
354
|
|
288
355
|
# kludge to get around the funny construct that references file
|
@@ -293,11 +360,11 @@ class NexusParser::Parser
|
|
293
360
|
|
294
361
|
# while true
|
295
362
|
|
296
|
-
# meh = @lexer.pop(NexusParser::Tokens::ValuePair)
|
363
|
+
# meh = @lexer.pop(NexusParser::Tokens::ValuePair)
|
297
364
|
# @vars.update(meh.value)
|
298
365
|
# break if !@lexer.peek(NexusParser::Tokens::ValuePair)
|
299
366
|
# end
|
300
|
-
#
|
367
|
+
#
|
301
368
|
# @builder.add_note(@vars)
|
302
369
|
# @vars = {}
|
303
370
|
#end
|
@@ -326,7 +393,7 @@ class NexusParser::Parser
|
|
326
393
|
# nor this
|
327
394
|
end
|
328
395
|
|
329
|
-
|
396
|
+
|
330
397
|
def parse_mesquite_blk
|
331
398
|
|
332
399
|
end
|
@@ -335,7 +402,7 @@ class NexusParser::Parser
|
|
335
402
|
|
336
403
|
# def parse_children(parent)
|
337
404
|
# parse a comma-separated list of nodes
|
338
|
-
# while true
|
405
|
+
# while true
|
339
406
|
# parse_node(parent)
|
340
407
|
# if @lexer.peek(NexusParser::Tokens::Comma)
|
341
408
|
# @lexer.pop(NexusParser::Tokens::Comma)
|
@@ -344,7 +411,7 @@ class NexusParser::Parser
|
|
344
411
|
# end
|
345
412
|
# end
|
346
413
|
# end
|
347
|
-
|
414
|
+
|
348
415
|
end
|
349
416
|
|
350
417
|
|
data/lib/nexus_parser/tokens.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
module NexusParser::Tokens
|
2
2
|
|
3
3
|
ENDBLKSTR = '(end|endblock)'.freeze
|
4
|
+
QUOTEDLABEL = '(\'+[^\']+\'+)|(\"+[^\"]+\"+)'
|
4
5
|
|
5
6
|
class Token
|
6
7
|
# this allows access the the class attribute regexp, without using a class variable
|
@@ -78,9 +79,7 @@ module NexusParser::Tokens
|
|
78
79
|
@regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
|
79
80
|
end
|
80
81
|
|
81
|
-
|
82
|
-
class Label < Token
|
83
|
-
@regexp = Regexp.new('\A\s*((\'+[^\']+\'+)|(\"+[^\"]+\"+)|(\w[^,:(); \t\n]*|_)+)\s*') # matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" " # choking on 'Foo_stuff_things'
|
82
|
+
class LabelBase < Token
|
84
83
|
def initialize(str)
|
85
84
|
str.strip!
|
86
85
|
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
@@ -90,6 +89,20 @@ module NexusParser::Tokens
|
|
90
89
|
end
|
91
90
|
end
|
92
91
|
|
92
|
+
class Label < LabelBase
|
93
|
+
@regexp = Regexp.new(/\A\s*(#{QUOTEDLABEL}|(\w[^,:(); \t\n]*)+)\s*/) # matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" "
|
94
|
+
def initialize(str)
|
95
|
+
super(str)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class CharacterLabel < LabelBase
|
100
|
+
@regexp = Regexp.new(/\A\s*(#{QUOTEDLABEL}|[^ \t\n\/\'\",;]+)\s*/)
|
101
|
+
def initialize(str)
|
102
|
+
super(str)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
93
106
|
class ChrsBlk < Token
|
94
107
|
@regexp = Regexp.new(/\A\s*(characters\s*;)\s*/i)
|
95
108
|
end
|
@@ -118,10 +131,50 @@ module NexusParser::Tokens
|
|
118
131
|
class RowVec < Token
|
119
132
|
@regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
|
120
133
|
def initialize(str)
|
121
|
-
#
|
122
|
-
|
123
|
-
|
124
|
-
|
134
|
+
# We ignore commas outside (and inside) of groupings, it's fine.
|
135
|
+
str.gsub!(/[\, \t]/, '')
|
136
|
+
|
137
|
+
groupers = ['(', ')', '{', '}']
|
138
|
+
openers = ['(', '{']
|
139
|
+
closers = [')', '}']
|
140
|
+
closer_for = { '(' => ')', '{' => '}' }
|
141
|
+
|
142
|
+
a = []
|
143
|
+
group = nil
|
144
|
+
group_closer = nil
|
145
|
+
str.each_char { |c|
|
146
|
+
if groupers.include? c
|
147
|
+
if ((openers.include?(c) && !group.nil?) ||
|
148
|
+
(closers.include?(c) && (group.nil? || c != group_closer)))
|
149
|
+
raise(NexusParser::ParseError,
|
150
|
+
"Mismatched grouping in matrix row '#{str}'")
|
151
|
+
end
|
152
|
+
|
153
|
+
if openers.include? c
|
154
|
+
group = []
|
155
|
+
group_closer = closer_for[c]
|
156
|
+
else # c is a closer
|
157
|
+
if group.count == 1
|
158
|
+
a << group.first
|
159
|
+
elsif group.count > 1
|
160
|
+
a << group
|
161
|
+
end
|
162
|
+
group = nil
|
163
|
+
group_closer = nil
|
164
|
+
end
|
165
|
+
else
|
166
|
+
if group.nil?
|
167
|
+
a << c
|
168
|
+
else
|
169
|
+
group << c
|
170
|
+
end
|
171
|
+
end
|
172
|
+
}
|
173
|
+
|
174
|
+
raise(NexusParser::ParseError,
|
175
|
+
"Unclosed grouping in matrix row '#{str}'") if !group.nil?
|
176
|
+
|
177
|
+
@value = a
|
125
178
|
end
|
126
179
|
end
|
127
180
|
|
@@ -129,6 +182,14 @@ module NexusParser::Tokens
|
|
129
182
|
@regexp = Regexp.new(/\A\s*(CHARSTATELABELS)\s*/i)
|
130
183
|
end
|
131
184
|
|
185
|
+
class CharLabels < Token
|
186
|
+
@regexp = Regexp.new(/\A\s*(CHARLABELS)\s*/i)
|
187
|
+
end
|
188
|
+
|
189
|
+
class StateLabels < Token
|
190
|
+
@regexp = Regexp.new(/\A\s*(STATELABELS)\s*/i)
|
191
|
+
end
|
192
|
+
|
132
193
|
class MesquiteIDs < Token
|
133
194
|
@regexp = Regexp.new(/\A\s*(IDS[^;]*;)\s*/i)
|
134
195
|
end
|
@@ -195,16 +256,6 @@ module NexusParser::Tokens
|
|
195
256
|
@regexp = Regexp.new('\A\s*(\/)\s*')
|
196
257
|
end
|
197
258
|
|
198
|
-
# labels
|
199
|
-
class ID < Token
|
200
|
-
@regexp = Regexp.new('\A\s*((\'[^\']+\')|(\w[^,:(); \t\n]*|_)+)\s*')
|
201
|
-
def initialize(str)
|
202
|
-
str.strip!
|
203
|
-
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
204
|
-
@value = str
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
259
|
class Colon < Token
|
209
260
|
@regexp = Regexp.new('\A\s*(:)\s*')
|
210
261
|
end
|
@@ -217,66 +268,10 @@ module NexusParser::Tokens
|
|
217
268
|
@regexp = Regexp.new('\A\s*(\,)\s*')
|
218
269
|
end
|
219
270
|
|
220
|
-
class
|
221
|
-
@regexp = Regexp.new('\A\s*(
|
222
|
-
def initialize(str)
|
223
|
-
# a little oddness here, in some case we don't want to include the .0
|
224
|
-
# see issues with numbers as labels
|
225
|
-
if str =~ /\./
|
226
|
-
@value = str.to_f
|
227
|
-
else
|
228
|
-
@value = str.to_i
|
229
|
-
end
|
230
|
-
|
231
|
-
end
|
271
|
+
class PositiveInteger < Token
|
272
|
+
@regexp = Regexp.new('\A\s*(\d+)\s*')
|
232
273
|
end
|
233
274
|
|
234
275
|
# NexusParser::Tokens::NexusComment
|
235
276
|
|
236
|
-
# this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
|
237
|
-
def self.nexus_file_token_list
|
238
|
-
[ NexusParser::Tokens::NexusStart,
|
239
|
-
NexusParser::Tokens::BeginBlk,
|
240
|
-
NexusParser::Tokens::EndBlk,
|
241
|
-
NexusParser::Tokens::AuthorsBlk,
|
242
|
-
NexusParser::Tokens::SetsBlk,
|
243
|
-
NexusParser::Tokens::MqCharModelsBlk,
|
244
|
-
NexusParser::Tokens::AssumptionsBlk,
|
245
|
-
NexusParser::Tokens::CodonsBlk,
|
246
|
-
NexusParser::Tokens::MesquiteBlk,
|
247
|
-
NexusParser::Tokens::TreesBlk,
|
248
|
-
NexusParser::Tokens::LabelsBlk,
|
249
|
-
NexusParser::Tokens::TaxaBlk,
|
250
|
-
NexusParser::Tokens::NotesBlk,
|
251
|
-
NexusParser::Tokens::Title,
|
252
|
-
NexusParser::Tokens::Taxlabels,
|
253
|
-
NexusParser::Tokens::Dimensions,
|
254
|
-
NexusParser::Tokens::FileLbl,
|
255
|
-
NexusParser::Tokens::Format,
|
256
|
-
NexusParser::Tokens::RespectCase,
|
257
|
-
NexusParser::Tokens::Equals,
|
258
|
-
NexusParser::Tokens::ValuePair, # this has bad overlap with Label and likely IDs (need to kill the latter, its a lesser Label)
|
259
|
-
NexusParser::Tokens::CharStateLabels,
|
260
|
-
NexusParser::Tokens::ChrsBlk,
|
261
|
-
NexusParser::Tokens::Number,
|
262
|
-
NexusParser::Tokens::Matrix,
|
263
|
-
NexusParser::Tokens::SemiColon,
|
264
|
-
NexusParser::Tokens::MesquiteIDs,
|
265
|
-
NexusParser::Tokens::MesquiteBlockID,
|
266
|
-
NexusParser::Tokens::BlkEnd,
|
267
|
-
NexusParser::Tokens::Colon,
|
268
|
-
NexusParser::Tokens::BckSlash,
|
269
|
-
NexusParser::Tokens::Comma,
|
270
|
-
NexusParser::Tokens::LParen,
|
271
|
-
NexusParser::Tokens::RParen,
|
272
|
-
NexusParser::Tokens::LBracket,
|
273
|
-
NexusParser::Tokens::RBracket,
|
274
|
-
NexusParser::Tokens::Label, # must be before RowVec
|
275
|
-
NexusParser::Tokens::RowVec,
|
276
|
-
NexusParser::Tokens::LinkLine,
|
277
|
-
NexusParser::Tokens::ID # need to trash this
|
278
|
-
]
|
279
|
-
end
|
280
|
-
|
281
277
|
end
|
282
|
-
|
data/lib/nexus_parser/version.rb
CHANGED
data/lib/nexus_parser.rb
CHANGED
@@ -3,9 +3,6 @@
|
|
3
3
|
# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
|
4
4
|
# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
|
5
5
|
|
6
|
-
# outstanding issues:
|
7
|
-
## need to resolve Tokens Labels, ValuePair, IDs
|
8
|
-
|
9
6
|
module NexusParser
|
10
7
|
|
11
8
|
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'tokens'))
|
@@ -118,7 +115,7 @@ class NexusParser
|
|
118
115
|
end
|
119
116
|
end
|
120
117
|
|
121
|
-
end
|
118
|
+
end # end NexusParser
|
122
119
|
|
123
120
|
|
124
121
|
# constructs the NexusParser
|
@@ -141,6 +138,9 @@ class Builder
|
|
141
138
|
def code_row(taxon_index, rowvector)
|
142
139
|
|
143
140
|
@nf.characters.each_with_index do |c, i|
|
141
|
+
raise(ParseError,
|
142
|
+
"Row #{taxon_index} of the matrix is too short") if rowvector[i].nil?
|
143
|
+
|
144
144
|
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
145
145
|
@nf.codings[taxon_index.to_i][i] = NexusParser::Coding.new(:states => rowvector[i])
|
146
146
|
|
@@ -185,7 +185,7 @@ class Builder
|
|
185
185
|
|
186
186
|
# need to create the characters
|
187
187
|
|
188
|
-
raise(
|
188
|
+
raise(ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
189
189
|
|
190
190
|
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
191
191
|
|
@@ -193,18 +193,45 @@ class Builder
|
|
193
193
|
@opt.delete(:name)
|
194
194
|
|
195
195
|
# the rest have states
|
196
|
-
@opt
|
196
|
+
create_or_update_states_for_character(@index, @opt)
|
197
|
+
end
|
198
|
+
|
199
|
+
def update_chr_name(i, name)
|
200
|
+
raise(ParseError, "There are #{@nf.characters.count} characters but we're trying to update from row #{i + 1} of the CHARLABELS list - check your NCHAR and/or the length of your list.") if !@nf.characters[i]
|
197
201
|
|
198
|
-
|
202
|
+
# The CHARLABELS list is unindexed, so users are allowed to use '_' to
|
203
|
+
# indicate that a character name is unspecified.
|
204
|
+
@nf.characters[i].name = (name == '_' ? '' : name)
|
205
|
+
end
|
206
|
+
|
207
|
+
# legal hash keys are :index and integers that point to state labels
|
208
|
+
def update_chr_states(options = {})
|
209
|
+
return false if !options[:index]
|
210
|
+
|
211
|
+
@opt = options
|
212
|
+
|
213
|
+
@index = @opt[:index].to_i
|
214
|
+
|
215
|
+
raise(ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the STATELABELS. Check the indices.") if !@nf.characters[@index]
|
216
|
+
|
217
|
+
@opt.delete(:index)
|
218
|
+
|
219
|
+
# the rest have states
|
220
|
+
create_or_update_states_for_character(@index, @opt)
|
221
|
+
end
|
222
|
+
|
223
|
+
def create_or_update_states_for_character(i, options)
|
224
|
+
options.keys.each do |k|
|
225
|
+
|
226
|
+
if (@nf.characters[i].states != {}) && @nf.characters[i].states[k] # state exists
|
199
227
|
|
200
228
|
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
201
|
-
update_state(
|
229
|
+
update_state(i, :index => k, :name => options[k])
|
202
230
|
|
203
231
|
else # doesn't, create it
|
204
|
-
@nf.characters[
|
232
|
+
@nf.characters[i].add_state(:label => k.to_s, :name => options[k])
|
205
233
|
end
|
206
234
|
end
|
207
|
-
|
208
235
|
end
|
209
236
|
|
210
237
|
def update_state(chr_index, options = {})
|
@@ -256,7 +283,7 @@ class Builder
|
|
256
283
|
@nf
|
257
284
|
end
|
258
285
|
|
259
|
-
end # end
|
286
|
+
end # end Builder
|
260
287
|
|
261
288
|
# NexusParser::ParseError
|
262
289
|
class ParseError < StandardError
|
data/test/test_nexus_parser.rb
CHANGED
@@ -35,18 +35,18 @@ class Test_Lexer < Test::Unit::TestCase
|
|
35
35
|
def test_lexer
|
36
36
|
lexer = NexusParser::Lexer.new("[ foo ] BEGIN taxa; BLORF end;")
|
37
37
|
assert lexer.pop(NexusParser::Tokens::LBracket)
|
38
|
-
assert id = lexer.pop(NexusParser::Tokens::
|
38
|
+
assert id = lexer.pop(NexusParser::Tokens::Label)
|
39
39
|
assert_equal(id.value, "foo")
|
40
40
|
assert lexer.pop(NexusParser::Tokens::RBracket)
|
41
41
|
assert lexer.pop(NexusParser::Tokens::BeginBlk)
|
42
42
|
assert lexer.pop(NexusParser::Tokens::TaxaBlk)
|
43
|
-
assert foo = lexer.pop(NexusParser::Tokens::
|
43
|
+
assert foo = lexer.pop(NexusParser::Tokens::Label)
|
44
44
|
assert_equal("BLORF", foo.value) # truncating whitespace
|
45
45
|
assert lexer.pop(NexusParser::Tokens::BlkEnd)
|
46
46
|
|
47
47
|
lexer2 = NexusParser::Lexer.new("[ foo ] begin authors; BLORF end; [] () some crud here")
|
48
48
|
assert lexer2.pop(NexusParser::Tokens::LBracket)
|
49
|
-
assert id = lexer2.pop(NexusParser::Tokens::
|
49
|
+
assert id = lexer2.pop(NexusParser::Tokens::Label)
|
50
50
|
assert_equal(id.value, "foo")
|
51
51
|
assert lexer2.pop(NexusParser::Tokens::RBracket)
|
52
52
|
assert lexer2.pop(NexusParser::Tokens::BeginBlk)
|
@@ -64,44 +64,44 @@ class Test_Lexer < Test::Unit::TestCase
|
|
64
64
|
|
65
65
|
lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
|
66
66
|
assert lexer3.pop(NexusParser::Tokens::LBracket)
|
67
|
-
assert id = lexer3.pop(NexusParser::Tokens::
|
67
|
+
assert id = lexer3.pop(NexusParser::Tokens::Label)
|
68
68
|
assert_equal(id.value, "foo")
|
69
69
|
assert lexer3.pop(NexusParser::Tokens::RBracket)
|
70
70
|
assert lexer3.pop(NexusParser::Tokens::BeginBlk)
|
71
71
|
assert lexer3.pop(NexusParser::Tokens::ChrsBlk)
|
72
|
-
assert foo = lexer3.pop(NexusParser::Tokens::
|
72
|
+
assert foo = lexer3.pop(NexusParser::Tokens::Label)
|
73
73
|
assert_equal("BLORF", foo.value)
|
74
74
|
assert lexer3.pop(NexusParser::Tokens::BlkEnd)
|
75
75
|
|
76
76
|
lexer4 = NexusParser::Lexer.new("Begin Characters; 123123123 end; [] () some crud here")
|
77
77
|
assert lexer4.pop(NexusParser::Tokens::BeginBlk)
|
78
78
|
assert lexer4.pop(NexusParser::Tokens::ChrsBlk)
|
79
|
-
assert foo = lexer4.pop(NexusParser::Tokens::
|
80
|
-
assert_equal(123123123, foo.value)
|
79
|
+
assert foo = lexer4.pop(NexusParser::Tokens::PositiveInteger)
|
80
|
+
assert_equal('123123123', foo.value)
|
81
81
|
assert lexer4.pop(NexusParser::Tokens::BlkEnd)
|
82
82
|
|
83
83
|
lexer5 = NexusParser::Lexer.new("(0,1)")
|
84
84
|
assert lexer5.pop(NexusParser::Tokens::LParen)
|
85
|
-
assert foo = lexer5.pop(NexusParser::Tokens::
|
86
|
-
assert_equal(0, foo.value)
|
85
|
+
assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
|
86
|
+
assert_equal('0', foo.value)
|
87
87
|
assert lexer5.pop(NexusParser::Tokens::Comma)
|
88
|
-
assert foo = lexer5.pop(NexusParser::Tokens::
|
89
|
-
assert_equal(1, foo.value)
|
88
|
+
assert foo = lexer5.pop(NexusParser::Tokens::PositiveInteger)
|
89
|
+
assert_equal('1', foo.value)
|
90
90
|
assert lexer5.pop(NexusParser::Tokens::RParen)
|
91
91
|
|
92
92
|
lexer6 = NexusParser::Lexer.new(" 210(0,1)10A1\n")
|
93
93
|
assert foo = lexer6.pop(NexusParser::Tokens::RowVec)
|
94
94
|
assert_equal(["2","1","0",["0","1"],"1","0","A","1"], foo.value)
|
95
95
|
|
96
|
-
lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{
|
96
|
+
lexer6a = NexusParser::Lexer.new(" 21a(0 1)0b{345}(0)(1 a)\n")
|
97
97
|
assert foo = lexer6a.pop(NexusParser::Tokens::RowVec)
|
98
98
|
assert_equal(["2", "1", "a", ["0", "1"], "0", "b", ["3", "4", "5"], "0", ["1", "a"]], foo.value)
|
99
99
|
|
100
|
-
lexer6b = NexusParser::Lexer.new(" 201{0 1}
|
100
|
+
lexer6b = NexusParser::Lexer.new(" 201(01){0 1}0100\x0A") # *nix line ending
|
101
101
|
assert foo = lexer6b.pop(NexusParser::Tokens::RowVec)
|
102
102
|
assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
|
103
103
|
|
104
|
-
lexer6c = NexusParser::Lexer.new(" 201{0 1}{
|
104
|
+
lexer6c = NexusParser::Lexer.new(" 201{0 1}{01}0100\x0D\x0A") # * dos line ending
|
105
105
|
assert foo = lexer6c.pop(NexusParser::Tokens::RowVec)
|
106
106
|
assert_equal(["2", "0", "1", ["0", "1"], ["0", "1"], "0", "1", "0", "0"], foo.value)
|
107
107
|
|
@@ -126,7 +126,41 @@ class Test_Lexer < Test::Unit::TestCase
|
|
126
126
|
def test_row_vec
|
127
127
|
lexer = NexusParser::Lexer.new("0?(0 1)10(A BD , C)1(0,1,2)1-\n")
|
128
128
|
assert foo = lexer.pop(NexusParser::Tokens::RowVec)
|
129
|
-
assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "
|
129
|
+
assert_equal(["0", "?", ["0", "1"], "1", "0", ["A", "B", "D", "C"], "1", ["0", "1", "2"], "1", "-"], foo.value)
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_ungrouped_spaces_in_row_vec
|
133
|
+
lexer = NexusParser::Lexer.new("- A 12(BC) ? \n")
|
134
|
+
assert foo = lexer.pop(NexusParser::Tokens::RowVec)
|
135
|
+
assert_equal(['-', 'A', '1', '2', ['B', 'C'], '?'], foo.value)
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_mismatched_parens_row_vec
|
139
|
+
lexer = NexusParser::Lexer.new("01(12(13\n")
|
140
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
141
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
142
|
+
}
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_mismatched_groupers_row_vec
|
146
|
+
lexer = NexusParser::Lexer.new("01(12}13\n")
|
147
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
148
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
149
|
+
}
|
150
|
+
end
|
151
|
+
|
152
|
+
def test_nested_parens_row_vec
|
153
|
+
lexer = NexusParser::Lexer.new("01(12(34))13\n")
|
154
|
+
assert_raise_with_message(NexusParser::ParseError, /Mismatch/) {
|
155
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_unclosed_parens_row_vec
|
160
|
+
lexer = NexusParser::Lexer.new("01(123413\n")
|
161
|
+
assert_raise_with_message(NexusParser::ParseError, /Unclosed/) {
|
162
|
+
lexer.pop(NexusParser::Tokens::RowVec)
|
163
|
+
}
|
130
164
|
end
|
131
165
|
|
132
166
|
def test_punctuation
|
@@ -428,11 +462,6 @@ class Test_Lexer < Test::Unit::TestCase
|
|
428
462
|
assert_equal 'SETS', foo.value.slice(0,4)
|
429
463
|
assert_equal 'END;', foo.value.slice(-4,4)
|
430
464
|
end
|
431
|
-
|
432
|
-
def test_lexer_errors
|
433
|
-
lexer = NexusParser::Lexer.new("*&")
|
434
|
-
assert_raise(NexusParser::ParseError) {lexer.peek(NexusParser::Tokens::ID)}
|
435
|
-
end
|
436
465
|
end
|
437
466
|
|
438
467
|
|
@@ -574,6 +603,30 @@ class Test_Parser < Test::Unit::TestCase
|
|
574
603
|
assert_equal ["-", "0", "1", "2", "A"], foo.characters[4].state_labels
|
575
604
|
end
|
576
605
|
|
606
|
+
def test_matrix_with_short_row
|
607
|
+
input= "
|
608
|
+
DIMENSIONS NCHAR=2;
|
609
|
+
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
|
610
|
+
CHARSTATELABELS
|
611
|
+
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger;
|
612
|
+
MATRIX
|
613
|
+
Dictyna 0?
|
614
|
+
Uloborus ??
|
615
|
+
Deinopis 0
|
616
|
+
;
|
617
|
+
END;"
|
618
|
+
|
619
|
+
builder = NexusParser::Builder.new
|
620
|
+
@lexer = NexusParser::Lexer.new(input)
|
621
|
+
|
622
|
+
# stub the taxa, they would otherwise get added in dimensions or taxa block
|
623
|
+
(0..2).each{|i| builder.stub_taxon}
|
624
|
+
|
625
|
+
assert_raise_with_message(NexusParser::ParseError, /too short/) {
|
626
|
+
NexusParser::Parser.new(@lexer, builder).parse_characters_blk
|
627
|
+
}
|
628
|
+
end
|
629
|
+
|
577
630
|
def test_characters_block_without_IDs_or_title
|
578
631
|
input= "
|
579
632
|
DIMENSIONS NCHAR=10;
|
@@ -623,6 +676,55 @@ class Test_Parser < Test::Unit::TestCase
|
|
623
676
|
assert_equal 10, foo.characters.size
|
624
677
|
end
|
625
678
|
|
679
|
+
def test_characters_charlabels_statelabels_block
|
680
|
+
input= "
|
681
|
+
DIMENSIONS NCHAR=4;
|
682
|
+
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";
|
683
|
+
CHARLABELS
|
684
|
+
Tibia_II
|
685
|
+
TII_macrosetae
|
686
|
+
'Femoral tuber'
|
687
|
+
_
|
688
|
+
;
|
689
|
+
STATELABELS
|
690
|
+
1 norm modified,
|
691
|
+
3 3 3.5 4,
|
692
|
+
4 pres
|
693
|
+
;
|
694
|
+
MATRIX
|
695
|
+
Dictyna -?1(01)
|
696
|
+
Uloborus 0321
|
697
|
+
;
|
698
|
+
ENDBLOCK;"
|
699
|
+
|
700
|
+
builder = NexusParser::Builder.new
|
701
|
+
lexer = NexusParser::Lexer.new(input)
|
702
|
+
|
703
|
+
(0..3).each{|i| builder.stub_taxon}
|
704
|
+
|
705
|
+
NexusParser::Parser.new(lexer,builder).parse_characters_blk
|
706
|
+
foo = builder.nexus_file
|
707
|
+
|
708
|
+
assert_equal 4, foo.characters.size
|
709
|
+
assert_equal "Femoral tuber", foo.characters[2].name
|
710
|
+
assert_equal "Undefined", foo.characters[3].name
|
711
|
+
|
712
|
+
assert_equal "norm", foo.characters[0].states["0"].name
|
713
|
+
assert_equal "modified", foo.characters[0].states["1"].name
|
714
|
+
|
715
|
+
assert_equal "", foo.characters[1].states["3"].name
|
716
|
+
|
717
|
+
assert_equal ["3", "3.5", "4"], foo.characters[2].states.keys.collect{|s| foo.characters[2].states[s].name}.sort
|
718
|
+
|
719
|
+
assert_equal "", foo.characters[1].states["3"].name
|
720
|
+
|
721
|
+
assert_equal ["-"], foo.codings[0][0].states
|
722
|
+
assert_equal ["?"], foo.codings[0][1].states
|
723
|
+
assert_equal ["0", "1"], foo.codings[0][3].states
|
724
|
+
|
725
|
+
assert_equal ["3"], foo.codings[1][1].states
|
726
|
+
end
|
727
|
+
|
626
728
|
def test_codings
|
627
729
|
foo = parse_nexus_file(@nf)
|
628
730
|
assert_equal 100, foo.codings.flatten.size # two multistates count in single cells
|
@@ -673,6 +775,68 @@ class Test_Parser < Test::Unit::TestCase
|
|
673
775
|
assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
|
674
776
|
end
|
675
777
|
|
778
|
+
# https://github.com/mjy/nexus_parser/issues/9
|
779
|
+
def test_three_both_numeric_and_label_state_names_in_a_row
|
780
|
+
input =" CHARSTATELABELS
|
781
|
+
1 'Metatarsal trichobothria (CodAra.29)' / 3 9 27 asdf;
|
782
|
+
Matrix
|
783
|
+
fooo 01 more stuff here that should not be hit"
|
784
|
+
|
785
|
+
builder = NexusParser::Builder.new
|
786
|
+
lexer = NexusParser::Lexer.new(input)
|
787
|
+
|
788
|
+
builder.stub_chr()
|
789
|
+
|
790
|
+
NexusParser::Parser.new(lexer, builder).parse_chr_state_labels
|
791
|
+
|
792
|
+
foo = builder.nexus_file
|
793
|
+
|
794
|
+
assert_equal "3", foo.characters[0].states['0'].name
|
795
|
+
assert_equal "9", foo.characters[0].states['1'].name
|
796
|
+
assert_equal "27", foo.characters[0].states['2'].name
|
797
|
+
assert_equal "asdf", foo.characters[0].states['3'].name
|
798
|
+
end
|
799
|
+
|
800
|
+
def test_non_label_character_name_character_labels
|
801
|
+
input = 'CHARSTATELABELS
|
802
|
+
1 (intentionally_blank) /,
|
803
|
+
2 /,
|
804
|
+
3 %_coverage /,
|
805
|
+
4 #_of_widgets /,
|
806
|
+
5 !endangered! /,
|
807
|
+
6 @the_front /,
|
808
|
+
7 =antennae,
|
809
|
+
8 `a_=_2` /,
|
810
|
+
9 -35_or-36 ,
|
811
|
+
10 27_or_less /,
|
812
|
+
11 fine_not_fine /,
|
813
|
+
12 3,
|
814
|
+
;'
|
815
|
+
|
816
|
+
builder = NexusParser::Builder.new
|
817
|
+
lexer = NexusParser::Lexer.new(input)
|
818
|
+
|
819
|
+
(0..11).each{builder.stub_chr()}
|
820
|
+
|
821
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
822
|
+
|
823
|
+
foo = builder.nexus_file
|
824
|
+
|
825
|
+
assert_equal 12, foo.characters.size
|
826
|
+
assert_equal "(intentionally_blank)", foo.characters[0].name
|
827
|
+
assert_equal "Undefined", foo.characters[1].name
|
828
|
+
assert_equal "%_coverage", foo.characters[2].name
|
829
|
+
assert_equal "#_of_widgets", foo.characters[3].name
|
830
|
+
assert_equal "!endangered!", foo.characters[4].name
|
831
|
+
assert_equal "@the_front", foo.characters[5].name
|
832
|
+
assert_equal "=antennae", foo.characters[6].name # =3
|
833
|
+
assert_equal "`a_=_2`", foo.characters[7].name
|
834
|
+
assert_equal "-35_or-36", foo.characters[8].name
|
835
|
+
assert_equal "27_or_less", foo.characters[9].name
|
836
|
+
assert_equal "fine_not_fine", foo.characters[10].name
|
837
|
+
assert_equal "3", foo.characters[11].name
|
838
|
+
end
|
839
|
+
|
676
840
|
def test_parse_chr_state_labels
|
677
841
|
input =" CHARSTATELABELS
|
678
842
|
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
@@ -754,6 +918,169 @@ class Test_Parser < Test::Unit::TestCase
|
|
754
918
|
|
755
919
|
end
|
756
920
|
|
921
|
+
def test_parse_chr_labels
|
922
|
+
input =" CHARLABELS
|
923
|
+
_
|
924
|
+
'Maxillary teeth'
|
925
|
+
as_df
|
926
|
+
'Highest number of maxillary teeth (or alveoli):';
|
927
|
+
STATELABELS
|
928
|
+
1 more more more,"
|
929
|
+
|
930
|
+
builder = NexusParser::Builder.new
|
931
|
+
lexer = NexusParser::Lexer.new(input)
|
932
|
+
|
933
|
+
(0..3).each{builder.stub_chr()}
|
934
|
+
|
935
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_labels
|
936
|
+
|
937
|
+
foo = builder.nexus_file
|
938
|
+
assert_equal 4, foo.characters.size
|
939
|
+
assert_equal 'Undefined', foo.characters[0].name
|
940
|
+
assert_equal 'Maxillary teeth', foo.characters[1].name
|
941
|
+
assert_equal 'as_df', foo.characters[2].name
|
942
|
+
assert_equal 'Highest number of maxillary teeth (or alveoli):', foo.characters[3].name
|
943
|
+
end
|
944
|
+
|
945
|
+
def test_parse_state_labels
|
946
|
+
input =" STATELABELS
|
947
|
+
1 norm modified,
|
948
|
+
3,
|
949
|
+
4 pres
|
950
|
+
;
|
951
|
+
CHARLABELS;
|
952
|
+
"
|
953
|
+
|
954
|
+
builder = NexusParser::Builder.new
|
955
|
+
lexer = NexusParser::Lexer.new(input)
|
956
|
+
|
957
|
+
(0..3).each{builder.stub_chr()}
|
958
|
+
|
959
|
+
NexusParser::Parser.new(lexer,builder).parse_state_labels
|
960
|
+
|
961
|
+
foo = builder.nexus_file
|
962
|
+
assert_equal 4, foo.characters.size
|
963
|
+
|
964
|
+
assert_equal "norm", foo.characters[0].states["0"].name
|
965
|
+
assert_equal "modified", foo.characters[0].states["1"].name
|
966
|
+
|
967
|
+
assert_empty foo.characters[1].states
|
968
|
+
|
969
|
+
assert_empty foo.characters[2].states
|
970
|
+
|
971
|
+
assert_equal "pres", foo.characters[3].states["0"].name
|
972
|
+
end
|
973
|
+
|
974
|
+
def test_non_label_character_state_character_labels
|
975
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
976
|
+
.5
|
977
|
+
.1.2_form
|
978
|
+
idsimple
|
979
|
+
%_of_length_less_than_10
|
980
|
+
!poisonous!
|
981
|
+
#_is_3_or_4
|
982
|
+
(leave_as_is)
|
983
|
+
@12_o_clock
|
984
|
+
>2
|
985
|
+
~equal
|
986
|
+
=9
|
987
|
+
;'
|
988
|
+
|
989
|
+
builder = NexusParser::Builder.new
|
990
|
+
lexer = NexusParser::Lexer.new(input)
|
991
|
+
|
992
|
+
builder.stub_chr()
|
993
|
+
|
994
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
995
|
+
|
996
|
+
foo = builder.nexus_file
|
997
|
+
|
998
|
+
assert_equal ".5", foo.characters[0].states["0"].name
|
999
|
+
assert_equal ".1.2_form", foo.characters[0].states["1"].name
|
1000
|
+
assert_equal "idsimple", foo.characters[0].states["2"].name
|
1001
|
+
assert_equal "%_of_length_less_than_10", foo.characters[0].states["3"].name
|
1002
|
+
assert_equal "!poisonous!", foo.characters[0].states["4"].name
|
1003
|
+
assert_equal "#_is_3_or_4", foo.characters[0].states["5"].name
|
1004
|
+
assert_equal "(leave_as_is)", foo.characters[0].states["6"].name
|
1005
|
+
assert_equal "@12_o_clock", foo.characters[0].states["7"].name
|
1006
|
+
assert_equal ">2", foo.characters[0].states["8"].name
|
1007
|
+
assert_equal "~equal", foo.characters[0].states["9"].name
|
1008
|
+
assert_equal "=9", foo.characters[0].states["10"].name
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
def test_arbitrary_quote_and_quotelike_character_state_labels
|
1012
|
+
# We could tighten up our handling of accidentally unclosed quotes, but
|
1013
|
+
# there's pretty much no way to recover in general, so we're not testing
|
1014
|
+
# them here.
|
1015
|
+
# Things like ""asdf" " failing is a known issue (maybe not solvable with
|
1016
|
+
# regular expressions?).
|
1017
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1018
|
+
"asd, \'f\'"
|
1019
|
+
""a\'sdf "
|
1020
|
+
\' /as"df/\'
|
1021
|
+
\'asdf;\'
|
1022
|
+
""as, df""
|
1023
|
+
;'
|
1024
|
+
|
1025
|
+
builder = NexusParser::Builder.new
|
1026
|
+
lexer = NexusParser::Lexer.new(input)
|
1027
|
+
|
1028
|
+
builder.stub_chr()
|
1029
|
+
|
1030
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1031
|
+
|
1032
|
+
foo = builder.nexus_file
|
1033
|
+
|
1034
|
+
assert_equal 'asd, \'f\'', foo.characters[0].states["0"].name
|
1035
|
+
assert_equal '"a\'sdf', foo.characters[0].states["1"].name
|
1036
|
+
assert_equal '/as"df/', foo.characters[0].states["2"].name
|
1037
|
+
assert_equal 'asdf;', foo.characters[0].states["3"].name
|
1038
|
+
assert_equal '"as, df"', foo.characters[0].states["4"].name
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
|
1042
|
+
def test_number_label_chr_state_labels
|
1043
|
+
# Character state names that start with numbers
|
1044
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1045
|
+
123abc
|
1046
|
+
-1.23abc
|
1047
|
+
-3e-3abc
|
1048
|
+
25%_or_less_than
|
1049
|
+
;'
|
1050
|
+
|
1051
|
+
builder = NexusParser::Builder.new
|
1052
|
+
lexer = NexusParser::Lexer.new(input)
|
1053
|
+
|
1054
|
+
(0..3).each{builder.stub_chr()}
|
1055
|
+
|
1056
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1057
|
+
|
1058
|
+
foo = builder.nexus_file
|
1059
|
+
|
1060
|
+
assert_equal "123abc", foo.characters[0].states["0"].name
|
1061
|
+
assert_equal "-1.23abc", foo.characters[0].states["1"].name
|
1062
|
+
assert_equal "-3e-3abc", foo.characters[0].states["2"].name
|
1063
|
+
assert_equal "25%_or_less_than", foo.characters[0].states["3"].name
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
def test_value_pair_label_chr_state_labels
|
1067
|
+
# Character state names that are ValuePairs
|
1068
|
+
input = 'CHARSTATELABELS 1 Tibia_II /
|
1069
|
+
234=(a_b_c)
|
1070
|
+
;'
|
1071
|
+
|
1072
|
+
builder = NexusParser::Builder.new
|
1073
|
+
lexer = NexusParser::Lexer.new(input)
|
1074
|
+
|
1075
|
+
builder.stub_chr()
|
1076
|
+
|
1077
|
+
NexusParser::Parser.new(lexer,builder).parse_chr_state_labels
|
1078
|
+
|
1079
|
+
foo = builder.nexus_file
|
1080
|
+
|
1081
|
+
assert_equal '234=(a_b_c)', foo.characters[0].states["0"].name
|
1082
|
+
end
|
1083
|
+
|
757
1084
|
def DONT_test_parse_really_long_string_of_chr_state_labels
|
758
1085
|
input =" CHARSTATELABELS
|
759
1086
|
1 Epigynal_ventral_margin / 'entire (Fig. 15G)' 'with scape (Fig. 27D)', 2 Epigynal_external_structure / openings_on_a_broad_depression 'copulatory openings on plate, flush with abdomen, sometimes slit like', 3 Epigynal_depression / 'round or square, at most slightly wider than high ' 'elongate, at least twice as wide as high ', 4 Epigynal_plate_surface / 'smooth (Fig. 12E)' 'ridged (Fig. 21G)', 5 epignynal_septum / absent_ present_, 6 Copulatory_bursa_anterior_margin / 'entire, broadly transverse (Fig. 19B)' 'medially acute (Figs. 22G, 40B)', 7 'Copulatory duct: spermathecal junction' / posterior lateral_or_anterior, 8 Copulatory_duct_loops_relative_to_spermathecae / apart 'encircling (Fig. 93J)', 9 Copulatory_duct_terminal_sclerotization / as_rest_of_duct_ 'distinctly sclerotized, clearly more than rest of duct ', 10 Hard_sclerotized_CD_region / mostly_or_entirely_ectal_to_the_ectal_rim_of_the_spermathecae 'caudal to the spermathecae, mesal to ectal margin of spermathecae', 11 Male_palpal_tibial_rim / uniform_or_only_slightly_asymmetric 'strongly and asymmetrically protruding, scoop-shaped (Fig 36D)', 12 Male_palpal_tibia_prolateral_trichobothria / one none, 13 Cymbial_ridge_ectal_setae / unmodified 'strongly curved towards the palpal bulb (Kochiura, Figs. 51B-C, 52C)', 14 Cymbial_distal_promargin / entire 'with an apophysis (Argyrodes, Figs.) ', 15 Cymbial_mesal_margin / entire 'incised (Anelosimus, Figs. 17D, 20A) ' deeply_notched, 16 Cymbial_tip_sclerotization / like_rest_of_cymbium 'lightly sclerotized, appears white', 17 Cymbial_tip_setae / like_other_setae 'thick and strongly curved (Kochiura, Figs. 51B, 52C)', 18 Cymbial_sheath / absent present, 19 Lock_placement / 'distal (Figs. 67B, 92F-G, I, M)' 'central (Fig. 92H)', 20 Lock_mechanism / 'hook (Figs 31F, 60D, 91A, 92D-E, J-L)' 'hood (Figs 18A, 75B, 92F-I, M)' 'Theridula (Fig 81D)', 21 Cymbial_hook_orientation / 'facing downwards (Figs. 91A, 92D-E, J-K)' 'facing upwards (Fig. 60C-D, 92L)', 22 Cymbial_hook_location / 'inside cymbium (Fig. 92D-E, J-K)' 'ectal cymbial margin (Figs. 67B, 92L).', 23 Cymbial_hook_distal_portion / 'blunt (Figs. 31F, 92D-E)' 'tapering to a narrow tongue (Figs. 66B, 67D, 92L)', 24 Cymbial_hood_size / 'narrow (Fig. 92F-H)' 'broad (Fig. 92I)' 'Spintharus (Fig. 92M)', 25 Cymbial_hood_region / 'translucent, hood visible through cymbium (Anelosimus, Figs. 90A, 91C)' 'opaque, hood not visible', 26 Alveolus_shape / 'circular or oval (Fig. 92A-H)' 'with a mesal extension (Fig. 92A)', 27 Tegulum_ectal_margin / entire 'protruded (Fig. 20D)', 28 Tegular_groove / absent 'present (Fig. 28B)', 29 SDT_SB_I / separate touching, 30 'SDT post-SB II turn' / gradual '90 degrees (Anelosimus, Fig. 93B)', 31 SDT_SB_I_&_II_reservoir_segment_alignment / divergent parallel, 32 SDT_SB_I_&_II_orientation / in_plane_of_first_loop_from_fundus 'out of plane of first loop, against tegular wall', 33 SDT_RSB_I_&_II / absent present, 34 SDT_SB_III / absent present, 35 SDT_SB_IV / absent 'present (Fig. 93E)', 36 Conductor_shape / 'simple, round or oval, short' 'fan shaped, narrow base and broad tip (Selkirkiella, Kochiura)' Enoplognatha Argyrodes Achaearanea Theridion '''rupununi''' '''tanzania''' '''cup-shaped''', 37 Conductor / 'with a groove for embolus (Figs. 10A, 28D, 69B)' 'entire (Figs. 13D, 17F, 52C-D)', 38 Conductor_surface / 'smooth (Figs. 75B, 77B-C)' ' heavily ridged (Figs. 10B-C, 44D. 67C, 69D)', 39 Conductor_tip_sclerotization / like_base more_than_base, 40 Subconductor / absent present, 41 Subconductor_pit_upper_wall / 'entire, or slightly protruding' forms_a_regular_oval_lip, 42 Subconductor_at_C_base / narrows_abruptly_before_C_base narrows_gradually_along_its_entire_length broad_at_base, 43 'Embolus tail-SC relation' / 'hooked in, or oriented towards SC' surpasses_SC behind_E_base, 44 Tegulum_ectally_ / occupying_less_than_half_of_the_cymbial_cavity_ occupying_more_than_half_of_the_cymbial_cavity, 45 MA_and_sperm_duct / sperm_duct_loop_not_inside_MA 'sperm duct loop inside MA (Figs. 90F, 91B)', 46 'MA-tegular membrane connection' / broad narrow, 47 MA_form / unbranched 'two nearly equally sized branches (Fig. 22A-B) ', 48 MA_distal_tip / entire hooded, 49 MA_hood_form / 'narrow, pit-like (Figs. 31F, 34D)' 'scoop-shaped (Figs. 60D, 66B, 67D)', 50 TTA_form / entire 'grooved (Fig. 44C)', 51 TTA / bulky 'prong shaped (vittatus group)', 52 TTA_distal_tip / entire_or_gently_curved Argyrodes 'hooked (branched)', 53 TTA_hook_distal_branch / barely_exceeding_lower_branch_ 'extending beyond lower branch (jucundus group) ', 54 TTA_hook_distal_branch / thick_ 'thin, finger like (domingo, dubiosus)', 55 TTA_hook_proximal_branch / 'blunt, broad' 'flattened, bladelike' 'cylindrical, elongated', 56 TTA_surface_subterminally / smooth ridged, 57 TTA_tip_surface / smooth 'ridged (Figs. 7A-B, 17F, 31D, 34D, 54A, 56B, 86A)', 58 Embolus_and_TTA / loosely_associated_to_or_resting_in_TTA_shallow_groove 'parts of E entirely enclosed in TTA (Figs. 37A-B, 44C, 89C)', 59 Embolus_tip_surface / smooth denticulate, 60 Embolus_spiral_curviture / gentle whip_like corkscrew, 61 Embolus_tip / entire bifid, 62 Embolus_origin / retroventral_on_tegulum 'retrolateral (ectal), partially or completely hidden by cymbium (Figs 44C, 60A-C, 67B)', 63 Embolus_ridges / absent present, 64 Embolus_shape / short_to_moderately_elongate 'extremely long, >2 spirals (Figs. 54D, 73A-E)', 65 Embolus_spiral_width / 'thin, much of E spiral subequal to E tip ' 'thick, entire E spiral much broader than tip ', 66 Embolus_distal_rim / 'entire (normal)' deeply_grooved, 67 Embolic_terminus / abrupt 'with a distal apophysis (EA, Fig. 34E) ', 68 Embolus_tail / 'entire, smooth' 'distinct, lobed', 69 'Embolus-dh connection grooves' / absent present, 70 'Embolus-dh grooves' / 'deep, extend into the E base more than twice longer than the distance between them' 'short, extend into the E base about as long, or slightly longer than the distance between them', 71 E_spiral_distally / 'relatively thin or filiform, cylindrical' 'thick, not cylindrical' 'rupununi/lorenzo like', 72 Embolus_spiral / entire 'biparted (Eb)' pars_pendula, 73 Eb_orientation / towards_embolus_tip towards_tibia, 74 Embolic_division_b / separates_early_from_E E_and_Eb_tightly_associated_the_entire_spiral, 75 Embolic_division_b / broad 'narrow, relative to Eb spiral, snout-like', 76 'Eb distal portion, ectal marginl' / 'level, not raised ' with_a_distinct_ridge_, 77 Eb_form / flat 'globose, inflated', 78 Eb_form / 'distinct, clearly separate apophysis' 'short, confined to first section of spiral, barely separate', 79 Eb_tip_and_E_tip_association / separate Eb_and_E_tips_juxtaposed 'E tip rests on Eb ''cup''', 80 Eb_snout / 'short, snug with E spiral ' 'long, separate from E spiral ', 81 Distal_portion_of_Eb / entire with_a_cup_shaped_apophysis with_a_raised_ridge, 82 E_tail / lobe_not_reaching_ectal_margin_of_Eb_ lobe_touching_ectal_margin_of_Eb_, 83 Extra_tegular_sclerite / absent_ present_, 84 'Median eyes (male)' / flush_with_carapace 'on tubercle (Argyrodes)', 85 'AME size (male)' / subequal_or_slightly_larger_than_ALE clearly_smaller_than_ALE, 86 Cheliceral_posterior_margin / toothed smooth, 87 Cheliceral_posterior_tooth_number / three_or_more two one, 88 Cheliceral_furrow / smooth denticulate, 89 Carapace_hairiness / 'sparsely or patchily hirsute (Fig. 48D)' 'uniformly hirsute (Fig. 71D)', 90 Carapace_pars_stridens / irregular regular_parallel_ridges, 91 Interocular_area / more_or_less_flush_with_clypeus projecting_beyond_clypeus, 92 Clypeus / concave_or_flat with_a_prominent_projection, 93 'ocular and clypeal region setae distribution (male)' / sparse 'in a dense field, or fields', 94 'Labium-sternum connection' / 'visible seam (Fig. 27C)' fused, 95 Sternocoxal_tubercles / present absent, 96 Pedicel_location / 'anterior (Fig. 94A-D)' 'medial (Fig. 94J-K)', 97 Abdominal_folium_pattern / bilateral_spots_or_blotches distinct_central_band_, 98 Abdomen_pattern / Anelosimus_, 99 Dorsal_band / 'dark edged by white (Kochiura, Anelosimus, Fig. 94G, J)' 'light edged by dark (Fig. 94H)' 'Ameridion, light edged by white (Fig. 94I)', 100 Abdominal_dot_pigment / silver 'non-reflective, dull', 101 SPR_form / 'weakly keeled (Figs. 67F, 74F)' 'strongly keeled and elongate (Figs. 16B-C, 24D-E, 42F)', 102 SPR_pick_number / '1-4' '6-28' '>30', 103 SPR_insertion / flush_with_abdominal_surface 'on a ridge (Figs 32D, 72A-B)', 104 'SPR mesally-oriented picks' / absent present, 105 'SPR mesally-oriented picks relative to sagittal plane' / angled_dorsally perpendicular_or_angled_ventrally, 106 SPR / straight_or_slightly_irregular distinctly_curved 'argyrodine, dorsal picks aside others', 107 SPR_dorsal_pick_spacing / subequal_to_ventral_pick_spacing distinctly_compressed, 108 SPR_relative_to_pedicel / lateral dorsal, 109 SPR_setae / separate tight, 110 'Supra pedicillate ventrolateral (4 o''clock) proprioreceptor' / absent present, 111 Epiandrous_fusule_arrangement / in_one_pair_of_sockets in_a_row, 112 Epiandrous_fusule_pair_number / '=>9' '6-8' '4-5' 1, 113 Colulus / 'present (Figs. 45E, 61F)' 'absent (Figs. 16E, 78A)' 'invaginated (Figs. 9D, 63G)', 114 Colulus_size / 'large and fleshy (Figs. 55H, 61F)' 'small, less than half the length of its setae (Fig. 38B)', 115 Colular_setae / present absent, 116 'Colular setae number (female)' / three_or_more two_, 117 'Palpal claw dentition (female)' / 'dense, > half of surface covered by denticles (Figs. 2D, 9E, 11D, 12G, 45G, 47E, 58G, 80D)' 'sparse < half of surface with denticles', 118 'Palpal tibial trichobothria (female)' / four three two five, 119 Femur_I_relative_to_II / subequal 'robust, clearly larger than femur II', 120 'Leg IV relative length (male)' / '3rd longest (typical leg formula 1243)' '2nd longest (typical leg formula 1423)' 'longest (typical leg formula 4123)', 121 'Leg IV relative length (female)' / 3rd_longest 2nd_longest longest_, 122 'Femur vs. metatarsus length (female)' / metatarsus_longer metatarsus_shorter, 123 'Femur vs. metatarsus length (male)' / metatarsus_longer metatarsus_shorter, 124 'Metatarsus vs. tibia length (female)' / metatarsus_longer metatarsus_shorter, 125 'Metatarsus vs. tibia length (male)' / metatarsus_longer metatarsus_shorter, 126 Metatarsal_ventral_macrosetae / like_other_macrosetae thickened_ventrally, 127 Tarsus_IV_comb_serrations / 'simple, straight' curved_hooks, 128 Tarsal_organ_size / 'smaller than setal sockets (normal)' enlarged, 129 'Tarsus IV central claw vs. laterals (male)' / 'short, at most subequal' 'elongate, longer (Figs. 19E, 21C, 23D, 32H, 57F, 58F)', 130 'Tarsus IV central claw vs. laterals (female)' / equal_or_shorter stout_and_distinctly_longer minute, 131 Spinneret_insertion / abdominal_apex 'subapical, abdomen extending beyond spinnerets', 132 PLS_flagelliform_spigot_length / subequal_to__PLS_CY 'longer than PLS CY (Figs. 68E, 78B, 82D)', 133 'PLS, PMS CY spigot bases' / 'not modified, subequal or smaller than ampullates' 'huge and elongated, much larger than ampullates ', 134 CY_shaft_surface / smooth grooved, 135 PLS_AC_spigot_number / five_or_more four_or_less, 136 PLS_flagelliform_spigot / present absent, 137 PLS_posterior_AG_spigot_shape / 'normal, round' flattened, 138 PLS_theridiid_type_AG_position / more_or_less_parallel end_to_end, 139 'PMS minor ampullate (mAP) spigot shaft length' / 'short, subequal to CY shaft' clearly_longer_than_any_CY_shaft, 140 Web_form / 'linyphioid-like sheet web (Fig. 99C)' 'cobweb (Figs. 97G, 99A-B, 100A-F, 101A-E)' 'network mesh web - with foraging field below (rupununi/lorenzo)' 'dry line-web', 141 'Knock-down lines' / absent present, 142 Sticky_silk_in_web / present absent, 143 Egg_sac_surface / spherical_to_lenticular 'stalked (Fig. 88E, 98D).', 144 Egg_case_structure / suboval_or_roundish basal_knob rhomboid elongated Spiky, 145 Web_construction / solitary communal, 146 Mating_thread / present absent, 147 Adult_females_per_nest / one multiple, 148 cooperative_behavior / solitary subsocial permanent_sociality ;
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nexus_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mjy
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-05-
|
12
|
+
date: 2024-05-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|