nexus_parser 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 93e9b7ced7b53e19e2cd33c72c433736058c4adcf7555dc6b43635c08f5dcea7
4
- data.tar.gz: 4b9bce06037be960e29e1e4f02bbeef286f47a35e977384a5d451e1f0c3e3f91
3
+ metadata.gz: be7f8d6dc4a222f456df1bb18dc3d63182cfb83b88ee036c227a93883c5ff70a
4
+ data.tar.gz: 1ab8785c3ca791476efe19d290ef25f20dc790792cf82fdee4ab1a0cd7468347
5
5
  SHA512:
6
- metadata.gz: 2297128f4ad470e7de1760996aaa712ed8bc691fee0a3cc8a59de54cf1b40bcc9d1cd6478049ae14f93b90ea208a93b6bc4872b76ecf8fc99221ea1b583f954d
7
- data.tar.gz: 19615c9fcdc2469bf32e681ac3b81b1a4ca8f468a0154671573b73c0e54f247d78b075ee3a2a1d63a4ba21c0cd657dfe656f8140b8feed07ca54d93fadf1772a
6
+ metadata.gz: e2c206656a9c8a1760e158641923b47f789ef42156fd9486dd75f70f92db670f31308adf739355eca6192ae3c702f6868b04fcab8ab87e7e44590371b3838bf7
7
+ data.tar.gz: fb4a0f18b0430dc04aa4feebd9d9ea46fb91b8557bc61efe7d75d4ad4427da1a0fa9f0632a3afe074fe5e47c17b18f9cd6a58786833b6c07a7181f819cece0d8
data/.gitignore CHANGED
@@ -13,6 +13,9 @@ tmtags
13
13
  ## VIM
14
14
  *.swp
15
15
 
16
+ ## BYEBUG
17
+ .byebug_history
18
+
16
19
  ## PROJECT::GENERAL
17
20
  coverage
18
21
  rdoc
@@ -151,8 +151,10 @@ class NexusParser::Parser
151
151
  # prolly pop header then fuse with parse_dimensions
152
152
  def parse_format
153
153
  @lexer.pop(NexusParser::Tokens::Format)
154
- while @lexer.peek(NexusParser::Tokens::ValuePair)
155
- @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
154
+
155
+ while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
156
+ @lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
157
+ @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) if @lexer.peek(NexusParser::Tokens::ValuePair)
156
158
  end
157
159
 
158
160
  check_initialization_of_ntax_nchar
@@ -1,5 +1,7 @@
1
1
  module NexusParser::Tokens
2
2
 
3
+ ENDBLKSTR = '(end|endblock)'.freeze
4
+
3
5
  class Token
4
6
  # this allows access the the class attribute regexp, without using a class variable
5
7
  class << self; attr_reader :regexp; end
@@ -31,12 +33,12 @@ module NexusParser::Tokens
31
33
  end
32
34
 
33
35
  class EndBlk < Token
34
- @regexp = Regexp.new(/\A\s*([\s]*End[\s]*;[\s]*)/i)
36
+ @regexp = Regexp.new(/\A\s*([\s]*#{ENDBLKSTR}[\s]*;[\s]*)/i)
35
37
  end
36
38
 
37
39
  # label
38
40
  class AuthorsBlk < Token
39
- @regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
41
+ @regexp = Regexp.new(/\A\s*(Authors;.*?#{ENDBLKSTR};)\s*/im)
40
42
  end
41
43
 
42
44
  # label
@@ -66,6 +68,11 @@ module NexusParser::Tokens
66
68
  @regexp = Regexp.new(/\A\s*(format)\s*/i)
67
69
  end
68
70
 
71
+ # TODO: Handled, but ignored
72
+ class RespectCase < Token
73
+ @regexp = Regexp.new(/\A\s*(respectcase)\s*/i)
74
+ end
75
+
69
76
  # label
70
77
  class Taxlabels < Token
71
78
  @regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
@@ -133,35 +140,35 @@ module NexusParser::Tokens
133
140
  # unparsed blocks
134
141
 
135
142
  class TreesBlk < Token
136
- @regexp = Regexp.new(/\A\s*(trees;.*?END;)\s*/im) # note the multi-line /m
143
+ @regexp = Regexp.new(/\A\s*(trees;.*?#{ENDBLKSTR};)\s*/im) # note the multi-line /m
137
144
  end
138
145
 
139
146
  class SetsBlk < Token
140
- @regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
147
+ @regexp = Regexp.new(/\A\s*(sets;.*?#{ENDBLKSTR};)\s*/im)
141
148
  end
142
149
 
143
150
  class MqCharModelsBlk < Token
144
- @regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
151
+ @regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?#{ENDBLKSTR};)\s*/im)
145
152
  end
146
153
 
147
154
  class LabelsBlk < Token
148
- @regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
155
+ @regexp = Regexp.new(/\A\s*(LABELS;.*?#{ENDBLKSTR};)\s*/im)
149
156
  end
150
157
 
151
158
  class AssumptionsBlk < Token
152
- @regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
159
+ @regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?#{ENDBLKSTR};)\s*/im)
153
160
  end
154
161
 
155
162
  class CodonsBlk < Token
156
- @regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
163
+ @regexp = Regexp.new(/\A\s*(CODONS;.*?#{ENDBLKSTR};)\s*/im)
157
164
  end
158
165
 
159
166
  class MesquiteBlk < Token
160
- @regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
167
+ @regexp = Regexp.new(/\A\s*(Mesquite;.*?#{ENDBLKSTR};)\s*/im)
161
168
  end
162
169
 
163
170
  class BlkEnd < Token
164
- @regexp = Regexp.new(/\A[\s]*(END;)\s*/i)
171
+ @regexp = Regexp.new(/\A[\s]*(#{ENDBLKSTR};)\s*/i)
165
172
  end
166
173
 
167
174
  class LBracket < Token
@@ -246,6 +253,7 @@ module NexusParser::Tokens
246
253
  NexusParser::Tokens::Dimensions,
247
254
  NexusParser::Tokens::FileLbl,
248
255
  NexusParser::Tokens::Format,
256
+ NexusParser::Tokens::RespectCase,
249
257
  NexusParser::Tokens::Equals,
250
258
  NexusParser::Tokens::ValuePair, # this has bad overlap with Label and likely IDs (need to kill the latter, its a lesser Label)
251
259
  NexusParser::Tokens::CharStateLabels,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module NexusParser
4
- VERSION = "1.2.0"
4
+ VERSION = "1.2.1"
5
5
  end
data/lib/nexus_parser.rb CHANGED
@@ -75,7 +75,7 @@ class NexusParser
75
75
  class Coding
76
76
  # unfortunately we need this for notes
77
77
  attr_accessor :notes
78
- attr_writer :state
78
+ attr_writer :state
79
79
 
80
80
  def initialize(options = {})
81
81
  @states = options[:states]
@@ -85,7 +85,7 @@ class NexusParser
85
85
  def states
86
86
  @states.class == Array ? @states : [@states]
87
87
  end
88
-
88
+
89
89
  end
90
90
 
91
91
  class Note
@@ -270,7 +270,7 @@ def parse_nexus_file(input)
270
270
  @input = input
271
271
  @input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
272
272
  # quickly peek at the input, does this look like a Nexus file?
273
- if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
273
+ if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /(end|endblock)\;/i)
274
274
  raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
275
275
  end
276
276
 
@@ -56,6 +56,12 @@ class Test_Lexer < Test::Unit::TestCase
56
56
  assert lexer2.pop(NexusParser::Tokens::LParen)
57
57
  assert lexer2.pop(NexusParser::Tokens::RParen)
58
58
 
59
+ lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
60
+ assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
61
+ assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
62
+ assert lexer2a.pop(NexusParser::Tokens::LBracket)
63
+ assert lexer2a.pop(NexusParser::Tokens::RBracket)
64
+
59
65
  lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
60
66
  assert lexer3.pop(NexusParser::Tokens::LBracket)
61
67
  assert id = lexer3.pop(NexusParser::Tokens::ID)
@@ -149,7 +155,7 @@ class Test_Lexer < Test::Unit::TestCase
149
155
  def test_EndBlk
150
156
  lexer = NexusParser::Lexer.new(" \n\n End ;")
151
157
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
152
- lexer = NexusParser::Lexer.new("\n\nEnd;")
158
+ lexer = NexusParser::Lexer.new("\n\nEndblock;")
153
159
  assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
154
160
 
155
161
  lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
@@ -401,13 +407,13 @@ class Test_Lexer < Test::Unit::TestCase
401
407
  CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
402
408
 
403
409
 
404
- END;
410
+ ENDBLOCK;
405
411
 
406
412
  BEGIN some other block;")
407
413
 
408
414
  assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
409
415
  assert_equal 'LABELS', foo.value.slice(0,6)
410
- assert_equal 'END;', foo.value.slice(-4,4)
416
+ assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
411
417
  end
412
418
 
413
419
  def test_SetsBlk
@@ -513,8 +519,6 @@ class Test_Parser < Test::Unit::TestCase
513
519
  assert_equal "Tetragnatha", foo.taxa[9].name
514
520
  end
515
521
 
516
-
517
-
518
522
  def test_parse_characters_blk
519
523
  input= "
520
524
  TITLE 'Scharff&Coddington_1997_Araneidae';
@@ -589,7 +593,7 @@ class Test_Parser < Test::Unit::TestCase
589
593
  Tetragnatha 0?01011011
590
594
 
591
595
  ;
592
- END;"
596
+ ENDBLOCK;"
593
597
 
594
598
  builder = NexusParser::Builder.new
595
599
  @lexer = NexusParser::Lexer.new(input)
@@ -655,6 +659,20 @@ class Test_Parser < Test::Unit::TestCase
655
659
  # add test that nothing is left in lexer
656
660
  end
657
661
 
662
+ def test_parse_format_respect_case
663
+ input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
664
+ builder = NexusParser::Builder.new
665
+ lexer = NexusParser::Lexer.new(input)
666
+
667
+ NexusParser::Parser.new(lexer,builder).parse_format
668
+ foo = builder.nexus_file
669
+
670
+ assert_equal "STANDARD", foo.vars[:datatype]
671
+ assert_equal "-", foo.vars[:gap]
672
+ assert_equal "?", foo.vars[:missing]
673
+ assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
674
+ end
675
+
658
676
  def test_parse_chr_state_labels
659
677
  input =" CHARSTATELABELS
660
678
  1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nexus_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - mjy
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2024-04-03 00:00:00.000000000 Z
12
+ date: 2024-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
128
  - !ruby/object:Gem::Version
129
129
  version: '0'
130
130
  requirements: []
131
- rubygems_version: 3.5.3
131
+ rubygems_version: 3.5.9
132
132
  signing_key:
133
133
  specification_version: 4
134
134
  summary: A Nexus file format (phylogenetic inference) parser in Ruby.