nexus_parser 1.2.0 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 93e9b7ced7b53e19e2cd33c72c433736058c4adcf7555dc6b43635c08f5dcea7
4
- data.tar.gz: 4b9bce06037be960e29e1e4f02bbeef286f47a35e977384a5d451e1f0c3e3f91
3
+ metadata.gz: 4229e2b23de12e3ef92bc88a83aa04805d3884ca09019aaab843846f58fef964
4
+ data.tar.gz: 7973b5f04b84eea945ce632e5b20844a82c02a9a90c3c18a5ae4bbdaa97376c8
5
5
  SHA512:
6
- metadata.gz: 2297128f4ad470e7de1760996aaa712ed8bc691fee0a3cc8a59de54cf1b40bcc9d1cd6478049ae14f93b90ea208a93b6bc4872b76ecf8fc99221ea1b583f954d
7
- data.tar.gz: 19615c9fcdc2469bf32e681ac3b81b1a4ca8f468a0154671573b73c0e54f247d78b075ee3a2a1d63a4ba21c0cd657dfe656f8140b8feed07ca54d93fadf1772a
6
+ metadata.gz: cd2739e8dcf4b84287f325a6443227b0b669a45f38c23a20f32bf30cfe88ac7eb34b5a6af6b0929c9af7b55c21b9096e683543528858527920dccfadff10d425
7
+ data.tar.gz: 40780dadb8ddc80554ca199e6ea9f0ffb5672db51c66c1b41390a25cf4f4c39e2b27799f644a293135b848b9feb3af0fc4cab338e5fe7f40ba670dcaad384965
data/.gitignore CHANGED
@@ -13,6 +13,9 @@ tmtags
13
13
  ## VIM
14
14
  *.swp
15
15
 
16
+ ## BYEBUG
17
+ .byebug_history
18
+
16
19
  ## PROJECT::GENERAL
17
20
  coverage
18
21
  rdoc
@@ -33,18 +33,8 @@ class NexusParser::Lexer
33
33
  if @next_token
34
34
  return @next_token
35
35
  else
36
- # check for a match on the specified class first
37
36
  if match(token_class)
38
37
  return @next_token
39
- else
40
- # now check all the tokens for a match
41
- NexusParser::Tokens.nexus_file_token_list.each {|t|
42
- return @next_token if match(t)
43
- }
44
- end
45
- # no match, either end of string or lex-error
46
- if @input != ''
47
- raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
48
38
  else
49
39
  return nil
50
40
  end
@@ -1,6 +1,6 @@
1
1
 
2
2
  class NexusParser::Parser
3
-
3
+
4
4
  def initialize(lexer, builder)
5
5
  @lexer = lexer
6
6
  @builder = builder
@@ -10,41 +10,41 @@ class NexusParser::Parser
10
10
  # nf = @builder.new_nexus_file # create new local NexusParser instance, nf
11
11
  # blks = []
12
12
  @lexer.pop(NexusParser::Tokens::NexusStart)
13
-
13
+
14
14
  while @lexer.peek(NexusParser::Tokens::BeginBlk)
15
-
15
+
16
16
  @lexer.pop(NexusParser::Tokens::BeginBlk) # pop it
17
-
17
+
18
18
  if @lexer.peek(NexusParser::Tokens::AuthorsBlk)
19
19
  parse_authors_blk
20
-
21
- # we parse these below
20
+
21
+ # we parse these below
22
22
  elsif @lexer.peek(NexusParser::Tokens::TaxaBlk)
23
-
23
+
24
24
  @lexer.pop(NexusParser::Tokens::TaxaBlk )
25
25
  parse_taxa_blk
26
-
26
+
27
27
  elsif @lexer.peek(NexusParser::Tokens::ChrsBlk)
28
28
  @lexer.pop(NexusParser::Tokens::ChrsBlk)
29
29
  parse_characters_blk
30
30
 
31
31
  elsif @lexer.peek(NexusParser::Tokens::NotesBlk)
32
- @lexer.pop(NexusParser::Tokens::NotesBlk)
32
+ @lexer.pop(NexusParser::Tokens::NotesBlk)
33
33
  parse_notes_blk
34
34
 
35
35
  # we should parse this
36
36
  elsif @lexer.peek(NexusParser::Tokens::SetsBlk)
37
37
  @lexer.pop(NexusParser::Tokens::SetsBlk)
38
38
 
39
- # we don't parse these
39
+ # we don't parse these
40
40
  elsif @lexer.peek(NexusParser::Tokens::TreesBlk)
41
41
  @foo = @lexer.pop(NexusParser::Tokens::TreesBlk).value
42
-
42
+
43
43
  elsif @lexer.peek(NexusParser::Tokens::LabelsBlk)
44
44
  @lexer.pop(NexusParser::Tokens::LabelsBlk)
45
-
45
+
46
46
  elsif @lexer.peek(NexusParser::Tokens::MqCharModelsBlk)
47
- @lexer.pop(NexusParser::Tokens::MqCharModelsBlk)
47
+ @lexer.pop(NexusParser::Tokens::MqCharModelsBlk)
48
48
 
49
49
  elsif @lexer.peek(NexusParser::Tokens::AssumptionsBlk)
50
50
  @lexer.pop(NexusParser::Tokens::AssumptionsBlk)
@@ -52,7 +52,7 @@ class NexusParser::Parser
52
52
  elsif @lexer.peek(NexusParser::Tokens::CodonsBlk)
53
53
  @lexer.pop(NexusParser::Tokens::CodonsBlk)
54
54
  end
55
-
55
+
56
56
  end
57
57
  end
58
58
 
@@ -70,15 +70,15 @@ class NexusParser::Parser
70
70
 
71
71
  # while @lexer.peek(NexusParser::Tokens::ValuePair)
72
72
  # # IMPORTANT, these are going to a general hash, there may ultimately be overlap of keys used in different blocks, this is ignored at present
73
- # @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
73
+ # @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
74
74
  # end
75
-
75
+
76
76
  #@lexer.pop(NexusParser::Tokens::ID) if @lexer.peek(NexusParser::Tokens::ID)
77
77
  # end
78
78
  #end
79
79
  end
80
80
 
81
- def parse_taxa_blk
81
+ def parse_taxa_blk
82
82
  @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title)
83
83
 
84
84
  # need to not ignore to test against
@@ -88,7 +88,7 @@ class NexusParser::Parser
88
88
  while true
89
89
  inf += 1
90
90
  raise(NexusParser::ParseError,"Either you have a gazillion taxa or more likely the parser is caught in an infinite loop trying to parser taxon labels. Check for double single quotes in this block.") if inf > 100000
91
-
91
+
92
92
  if @lexer.peek(NexusParser::Tokens::EndBlk)
93
93
  @lexer.pop(NexusParser::Tokens::EndBlk)
94
94
  break
@@ -98,51 +98,53 @@ class NexusParser::Parser
98
98
  @lexer.pop(NexusParser::Tokens::Taxlabels) if @lexer.peek(NexusParser::Tokens::Taxlabels)
99
99
  i = 0
100
100
  while @lexer.peek(NexusParser::Tokens::Label)
101
- @builder.update_taxon(:index => i, :name => @lexer.pop(NexusParser::Tokens::Label).value)
101
+ @builder.update_taxon(:index => i, :name => @lexer.pop(NexusParser::Tokens::Label).value)
102
102
  i += 1
103
- end
103
+ end
104
104
  @lexer.pop(NexusParser::Tokens::SemiColon) if @lexer.peek(NexusParser::Tokens::SemiColon) # close of tax labels, placement of this seems dubious... but tests are working
105
-
105
+
106
106
  elsif @lexer.peek(NexusParser::Tokens::MesquiteIDs)
107
107
 
108
108
  @lexer.pop(NexusParser::Tokens::MesquiteIDs) # trashing these for now
109
109
  elsif @lexer.peek(NexusParser::Tokens::MesquiteBlockID)
110
- @lexer.pop(NexusParser::Tokens::MesquiteBlockID)
110
+ @lexer.pop(NexusParser::Tokens::MesquiteBlockID)
111
111
  end
112
-
112
+
113
113
  end
114
114
  end
115
115
 
116
116
 
117
117
  end
118
118
 
119
- def parse_characters_blk
120
-
121
- inf = 0
119
+ def parse_characters_blk
120
+
121
+ inf = 0
122
122
  while true
123
123
  inf += 1
124
124
  raise(NexusParser::ParseError,"Either you have a gazillion characters or more likely the parser is caught in an infinite loop trying to parser character data. Check for double single quotes in this block.") if inf > 100000
125
125
 
126
126
  if @lexer.peek(NexusParser::Tokens::EndBlk) # we're at the end of the block, exit after geting rid of the semi-colon
127
- break
127
+ break
128
128
  else
129
129
  @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title) # not used at present
130
130
  @lexer.pop(NexusParser::Tokens::LinkLine) if @lexer.peek(NexusParser::Tokens::LinkLine) # trashing these for now
131
-
131
+
132
132
  parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)
133
- parse_format if @lexer.peek(NexusParser::Tokens::Format)
134
-
133
+ parse_format if @lexer.peek(NexusParser::Tokens::Format)
134
+
135
135
  parse_chr_state_labels if @lexer.peek(NexusParser::Tokens::CharStateLabels)
136
136
 
137
- parse_matrix if @lexer.peek(NexusParser::Tokens::Matrix)
138
-
137
+ parse_chr_labels if @lexer.peek(NexusParser::Tokens::CharLabels)
138
+
139
+ parse_state_labels if @lexer.peek(NexusParser::Tokens::StateLabels)
140
+
141
+ parse_matrix if @lexer.peek(NexusParser::Tokens::Matrix)
142
+
139
143
  # handle "\s*OPTIONS MSTAXA = UNCERTAIN;\s\n" within a characters block (sticks in an infinite loop right now)
140
144
 
141
145
 
142
146
  @lexer.pop(NexusParser::Tokens::MesquiteIDs) if @lexer.peek(NexusParser::Tokens::MesquiteIDs) # trashing these for now
143
147
  @lexer.pop(NexusParser::Tokens::MesquiteBlockID) if @lexer.peek(NexusParser::Tokens::MesquiteBlockID) # trashing these for now
144
-
145
- false
146
148
  end
147
149
  end
148
150
  @lexer.pop(NexusParser::Tokens::EndBlk)
@@ -150,21 +152,23 @@ class NexusParser::Parser
150
152
 
151
153
  # prolly pop header then fuse with parse_dimensions
152
154
  def parse_format
153
- @lexer.pop(NexusParser::Tokens::Format)
154
- while @lexer.peek(NexusParser::Tokens::ValuePair)
155
- @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
155
+ @lexer.pop(NexusParser::Tokens::Format)
156
+
157
+ while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
158
+ @lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
159
+ @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) if @lexer.peek(NexusParser::Tokens::ValuePair)
156
160
  end
157
161
 
158
162
  check_initialization_of_ntax_nchar
159
163
  end
160
164
 
161
- def parse_dimensions
165
+ def parse_dimensions
162
166
  @lexer.pop(NexusParser::Tokens::Dimensions)
163
167
  while @lexer.peek(NexusParser::Tokens::ValuePair)
164
168
  @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
165
169
  end
166
170
  # the last value pair with a ; is automagically handled, don't try popping it again
167
-
171
+
168
172
  check_initialization_of_ntax_nchar
169
173
  end
170
174
 
@@ -173,7 +177,7 @@ class NexusParser::Parser
173
177
  if @builder.nexus_file.vars[:nchar] && @builder.nexus_file.characters == []
174
178
  (0..(@builder.nexus_file.vars[:nchar].to_i - 1)).each {|i| @builder.stub_chr }
175
179
  end
176
-
180
+
177
181
  # check for taxa dimensions, if otherwise not set generate them
178
182
  if @builder.nexus_file.vars[:ntax] && @builder.nexus_file.taxa == []
179
183
  (0..(@builder.nexus_file.vars[:ntax].to_i - 1)).each {|i| @builder.stub_taxon }
@@ -182,45 +186,108 @@ class NexusParser::Parser
182
186
 
183
187
  def parse_chr_state_labels
184
188
  @lexer.pop(NexusParser::Tokens::CharStateLabels)
185
-
186
- inf = 0
189
+
190
+ inf = 0
187
191
  while true
188
192
  inf += 1
189
193
  raise(NexusParser::ParseError,"Either you have a gazillion character state labels or more likely the parser is caught in an infinite loop while trying to parser character state labels. Check for double single quotes in this block.") if inf > 100000
190
194
 
191
- if @lexer.peek(NexusParser::Tokens::SemiColon)
192
- break
195
+ if @lexer.peek(NexusParser::Tokens::SemiColon)
196
+ break
193
197
  else
194
198
  opts = {}
195
-
196
199
  name = ""
197
- index = @lexer.pop(NexusParser::Tokens::Number).value.to_i
198
- (name = @lexer.pop(NexusParser::Tokens::Label).value) if @lexer.peek(NexusParser::Tokens::Label) # not always given a letter
200
+
201
+ index = @lexer.pop(NexusParser::Tokens::PositiveInteger).value.to_i
202
+
203
+ (name = @lexer.pop(NexusParser::Tokens::CharacterLabel).value) if @lexer.peek(NexusParser::Tokens::CharacterLabel) # not always given a letter
199
204
 
200
205
  @lexer.pop(NexusParser::Tokens::BckSlash) if @lexer.peek(NexusParser::Tokens::BckSlash)
201
206
 
202
207
  if !@lexer.peek(NexusParser::Tokens::Comma) || !@lexer.peek(NexusParser::Tokens::SemiColon)
203
208
  i = 0
204
209
 
205
- # three kludge lines, need to figure out the label/number priority, could be issue in list order w/in tokens
206
- while @lexer.peek(NexusParser::Tokens::Label) || @lexer.peek(NexusParser::Tokens::Number)
207
- opts.update({i.to_s => @lexer.pop(NexusParser::Tokens::Label).value}) if @lexer.peek(NexusParser::Tokens::Label)
208
- opts.update({i.to_s => @lexer.pop(NexusParser::Tokens::Number).value.to_s}) if @lexer.peek(NexusParser::Tokens::Number)
210
+ while @lexer.peek(NexusParser::Tokens::CharacterLabel)
211
+ opts.update({
212
+ i.to_s => @lexer.pop(NexusParser::Tokens::CharacterLabel).value
213
+ })
209
214
 
210
215
  i += 1
211
- end
216
+ end
212
217
  end
213
218
 
214
219
  @lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon
215
-
220
+
216
221
  opts.update({:index => (index - 1), :name => name})
217
-
222
+
218
223
  raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
219
224
  @builder.update_chr(opts)
220
- end
225
+ end
226
+
227
+ end
228
+ @lexer.pop(NexusParser::Tokens::SemiColon)
229
+ end
230
+
231
+ def parse_chr_labels
232
+ @lexer.pop(NexusParser::Tokens::CharLabels)
233
+
234
+ inf = 0
235
+ while true
236
+ inf += 1
237
+ raise(NexusParser::ParseError,"Either you have a gazillion character labels or more likely the parser is caught in an infinite loop while trying to parse character labels. Check for double single quotes in this block.") if inf > 100000
238
+
239
+ if @lexer.peek(NexusParser::Tokens::SemiColon)
240
+ break
241
+ else
242
+ i = 0
243
+ while @lexer.peek(NexusParser::Tokens::CharacterLabel)
244
+ @builder.update_chr_name(
245
+ i, @lexer.pop(NexusParser::Tokens::CharacterLabel).value
246
+ )
247
+
248
+ i += 1
249
+ end
250
+ end
251
+ end
252
+ @lexer.pop(NexusParser::Tokens::SemiColon)
253
+ end
254
+
255
+ def parse_state_labels
256
+ @lexer.pop(NexusParser::Tokens::StateLabels)
257
+
258
+ inf = 0
259
+ while true
260
+ inf += 1
261
+ raise(NexusParser::ParseError,"Either you have a gazillion state labels or more likely the parser is caught in an infinite loop while trying to parse state labels. Check for double single quotes in this block.") if inf > 100000
262
+
263
+ if @lexer.peek(NexusParser::Tokens::SemiColon)
264
+ break
265
+ else
266
+ opts = {}
267
+
268
+ index = @lexer.pop(NexusParser::Tokens::PositiveInteger).value.to_i
269
+
270
+ if !@lexer.peek(NexusParser::Tokens::Comma) && !@lexer.peek(NexusParser::Tokens::SemiColon)
271
+ i = 0
272
+
273
+ while @lexer.peek(NexusParser::Tokens::CharacterLabel)
274
+ opts.update({
275
+ i.to_s => @lexer.pop(NexusParser::Tokens::CharacterLabel).value
276
+ })
277
+
278
+ i += 1
279
+ end
280
+ end
281
+
282
+ @lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon
283
+
284
+ opts.update({:index => (index - 1)})
285
+
286
+ @builder.update_chr_states(opts)
287
+ end
221
288
 
222
289
  end
223
- @lexer.pop(NexusParser::Tokens::SemiColon)
290
+ @lexer.pop(NexusParser::Tokens::SemiColon)
224
291
  end
225
292
 
226
293
  def parse_matrix
@@ -228,25 +295,25 @@ class NexusParser::Parser
228
295
  i = 0
229
296
  while true
230
297
  if @lexer.peek(NexusParser::Tokens::SemiColon)
231
- break
298
+ break
232
299
  else
233
300
  t = @lexer.pop(NexusParser::Tokens::Label).value
234
301
 
235
302
  @builder.update_taxon(:index => i, :name => t) # if it exists its not re-added
236
303
 
237
304
  @builder.code_row(i, @lexer.pop(NexusParser::Tokens::RowVec).value)
238
-
305
+
239
306
  i += 1
240
307
  end
241
308
  end
242
- @lexer.pop(NexusParser::Tokens::SemiColon) # pop the semicolon
309
+ @lexer.pop(NexusParser::Tokens::SemiColon) # pop the semicolon
243
310
  end
244
311
 
245
312
  # this suck(s/ed), it needs work when a better API for Mesquite comes out
246
313
  def parse_notes_blk
247
314
  # IMPORTANT - we don't parse the (CM <note>), we just strip the "(CM" ... ")" bit for now in NexusParser::Note
248
315
 
249
- @vars = {}
316
+ @vars = {}
250
317
  inf = 0 # a crude iteration checker
251
318
  while true
252
319
  inf += 1
@@ -259,18 +326,20 @@ class NexusParser::Parser
259
326
 
260
327
  if @lexer.peek(NexusParser::Tokens::ValuePair)
261
328
  @vars.update(@lexer.pop(NexusParser::Tokens::ValuePair).value)
262
-
263
- elsif @lexer.peek(NexusParser::Tokens::Label)
264
- if @vars[:type] # we have the data for this row write it, and start a new one
265
-
329
+
330
+ elsif @lexer.peek(NexusParser::Tokens::FileLbl)
331
+ @lexer.pop(NexusParser::Tokens::FileLbl)
332
+ @vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
333
+
334
+ else @lexer.peek(NexusParser::Tokens::Label)
335
+ # If we already have a :type set then the Label we just peeked starts a
336
+ # new row, so write the current one and then start a new one.
337
+ if @vars[:type]
266
338
  @builder.add_note(@vars)
267
339
  @vars = {}
268
- else
269
- @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
270
340
  end
271
- elsif @lexer.peek(NexusParser::Tokens::FileLbl)
272
- @lexer.pop(NexusParser::Tokens::FileLbl)
273
- @vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
341
+
342
+ @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
274
343
  end
275
344
  end
276
345
  end
@@ -278,9 +347,9 @@ class NexusParser::Parser
278
347
 
279
348
  #@vars = {}
280
349
  #while true
281
-
282
- # break if @lexer.peek(NexusParser::Tokens::EndBlk)
283
-
350
+
351
+ # break if @lexer.peek(NexusParser::Tokens::EndBlk)
352
+
284
353
  # @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
285
354
 
286
355
  # kludge to get around the funny construct that references file
@@ -291,11 +360,11 @@ class NexusParser::Parser
291
360
 
292
361
  # while true
293
362
 
294
- # meh = @lexer.pop(NexusParser::Tokens::ValuePair)
363
+ # meh = @lexer.pop(NexusParser::Tokens::ValuePair)
295
364
  # @vars.update(meh.value)
296
365
  # break if !@lexer.peek(NexusParser::Tokens::ValuePair)
297
366
  # end
298
- #
367
+ #
299
368
  # @builder.add_note(@vars)
300
369
  # @vars = {}
301
370
  #end
@@ -324,7 +393,7 @@ class NexusParser::Parser
324
393
  # nor this
325
394
  end
326
395
 
327
-
396
+
328
397
  def parse_mesquite_blk
329
398
 
330
399
  end
@@ -333,7 +402,7 @@ class NexusParser::Parser
333
402
 
334
403
  # def parse_children(parent)
335
404
  # parse a comma-separated list of nodes
336
- # while true
405
+ # while true
337
406
  # parse_node(parent)
338
407
  # if @lexer.peek(NexusParser::Tokens::Comma)
339
408
  # @lexer.pop(NexusParser::Tokens::Comma)
@@ -342,7 +411,7 @@ class NexusParser::Parser
342
411
  # end
343
412
  # end
344
413
  # end
345
-
414
+
346
415
  end
347
416
 
348
417