nexus_parser 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/lib/nexus_parser/parser.rb +4 -2
- data/lib/nexus_parser/tokens.rb +18 -10
- data/lib/nexus_parser/version.rb +1 -1
- data/lib/nexus_parser.rb +3 -3
- data/test/test_nexus_parser.rb +24 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be7f8d6dc4a222f456df1bb18dc3d63182cfb83b88ee036c227a93883c5ff70a
|
4
|
+
data.tar.gz: 1ab8785c3ca791476efe19d290ef25f20dc790792cf82fdee4ab1a0cd7468347
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2c206656a9c8a1760e158641923b47f789ef42156fd9486dd75f70f92db670f31308adf739355eca6192ae3c702f6868b04fcab8ab87e7e44590371b3838bf7
|
7
|
+
data.tar.gz: fb4a0f18b0430dc04aa4feebd9d9ea46fb91b8557bc61efe7d75d4ad4427da1a0fa9f0632a3afe074fe5e47c17b18f9cd6a58786833b6c07a7181f819cece0d8
|
data/.gitignore
CHANGED
data/lib/nexus_parser/parser.rb
CHANGED
@@ -151,8 +151,10 @@ class NexusParser::Parser
|
|
151
151
|
# prolly pop header then fuse with parse_dimensions
|
152
152
|
def parse_format
|
153
153
|
@lexer.pop(NexusParser::Tokens::Format)
|
154
|
-
|
155
|
-
|
154
|
+
|
155
|
+
while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
|
156
|
+
@lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
|
157
|
+
@builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) if @lexer.peek(NexusParser::Tokens::ValuePair)
|
156
158
|
end
|
157
159
|
|
158
160
|
check_initialization_of_ntax_nchar
|
data/lib/nexus_parser/tokens.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module NexusParser::Tokens
|
2
2
|
|
3
|
+
ENDBLKSTR = '(end|endblock)'.freeze
|
4
|
+
|
3
5
|
class Token
|
4
6
|
# this allows access the the class attribute regexp, without using a class variable
|
5
7
|
class << self; attr_reader :regexp; end
|
@@ -31,12 +33,12 @@ module NexusParser::Tokens
|
|
31
33
|
end
|
32
34
|
|
33
35
|
class EndBlk < Token
|
34
|
-
@regexp = Regexp.new(/\A\s*([\s]
|
36
|
+
@regexp = Regexp.new(/\A\s*([\s]*#{ENDBLKSTR}[\s]*;[\s]*)/i)
|
35
37
|
end
|
36
38
|
|
37
39
|
# label
|
38
40
|
class AuthorsBlk < Token
|
39
|
-
@regexp = Regexp.new(/\A\s*(Authors
|
41
|
+
@regexp = Regexp.new(/\A\s*(Authors;.*?#{ENDBLKSTR};)\s*/im)
|
40
42
|
end
|
41
43
|
|
42
44
|
# label
|
@@ -66,6 +68,11 @@ module NexusParser::Tokens
|
|
66
68
|
@regexp = Regexp.new(/\A\s*(format)\s*/i)
|
67
69
|
end
|
68
70
|
|
71
|
+
# TODO: Handled, but ignored
|
72
|
+
class RespectCase < Token
|
73
|
+
@regexp = Regexp.new(/\A\s*(respectcase)\s*/i)
|
74
|
+
end
|
75
|
+
|
69
76
|
# label
|
70
77
|
class Taxlabels < Token
|
71
78
|
@regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
|
@@ -133,35 +140,35 @@ module NexusParser::Tokens
|
|
133
140
|
# unparsed blocks
|
134
141
|
|
135
142
|
class TreesBlk < Token
|
136
|
-
@regexp = Regexp.new(/\A\s*(trees
|
143
|
+
@regexp = Regexp.new(/\A\s*(trees;.*?#{ENDBLKSTR};)\s*/im) # note the multi-line /m
|
137
144
|
end
|
138
145
|
|
139
146
|
class SetsBlk < Token
|
140
|
-
@regexp = Regexp.new(/\A\s*(sets
|
147
|
+
@regexp = Regexp.new(/\A\s*(sets;.*?#{ENDBLKSTR};)\s*/im)
|
141
148
|
end
|
142
149
|
|
143
150
|
class MqCharModelsBlk < Token
|
144
|
-
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS
|
151
|
+
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?#{ENDBLKSTR};)\s*/im)
|
145
152
|
end
|
146
153
|
|
147
154
|
class LabelsBlk < Token
|
148
|
-
@regexp = Regexp.new(/\A\s*(LABELS
|
155
|
+
@regexp = Regexp.new(/\A\s*(LABELS;.*?#{ENDBLKSTR};)\s*/im)
|
149
156
|
end
|
150
157
|
|
151
158
|
class AssumptionsBlk < Token
|
152
|
-
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS
|
159
|
+
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?#{ENDBLKSTR};)\s*/im)
|
153
160
|
end
|
154
161
|
|
155
162
|
class CodonsBlk < Token
|
156
|
-
@regexp = Regexp.new(/\A\s*(CODONS
|
163
|
+
@regexp = Regexp.new(/\A\s*(CODONS;.*?#{ENDBLKSTR};)\s*/im)
|
157
164
|
end
|
158
165
|
|
159
166
|
class MesquiteBlk < Token
|
160
|
-
@regexp = Regexp.new(/\A\s*(Mesquite
|
167
|
+
@regexp = Regexp.new(/\A\s*(Mesquite;.*?#{ENDBLKSTR};)\s*/im)
|
161
168
|
end
|
162
169
|
|
163
170
|
class BlkEnd < Token
|
164
|
-
@regexp = Regexp.new(/\A[\s]*(
|
171
|
+
@regexp = Regexp.new(/\A[\s]*(#{ENDBLKSTR};)\s*/i)
|
165
172
|
end
|
166
173
|
|
167
174
|
class LBracket < Token
|
@@ -246,6 +253,7 @@ module NexusParser::Tokens
|
|
246
253
|
NexusParser::Tokens::Dimensions,
|
247
254
|
NexusParser::Tokens::FileLbl,
|
248
255
|
NexusParser::Tokens::Format,
|
256
|
+
NexusParser::Tokens::RespectCase,
|
249
257
|
NexusParser::Tokens::Equals,
|
250
258
|
NexusParser::Tokens::ValuePair, # this has bad overlap with Label and likely IDs (need to kill the latter, its a lesser Label)
|
251
259
|
NexusParser::Tokens::CharStateLabels,
|
data/lib/nexus_parser/version.rb
CHANGED
data/lib/nexus_parser.rb
CHANGED
@@ -75,7 +75,7 @@ class NexusParser
|
|
75
75
|
class Coding
|
76
76
|
# unfortunately we need this for notes
|
77
77
|
attr_accessor :notes
|
78
|
-
attr_writer :state
|
78
|
+
attr_writer :state
|
79
79
|
|
80
80
|
def initialize(options = {})
|
81
81
|
@states = options[:states]
|
@@ -85,7 +85,7 @@ class NexusParser
|
|
85
85
|
def states
|
86
86
|
@states.class == Array ? @states : [@states]
|
87
87
|
end
|
88
|
-
|
88
|
+
|
89
89
|
end
|
90
90
|
|
91
91
|
class Note
|
@@ -270,7 +270,7 @@ def parse_nexus_file(input)
|
|
270
270
|
@input = input
|
271
271
|
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
272
272
|
# quickly peek at the input, does this look like a Nexus file?
|
273
|
-
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
273
|
+
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /(end|endblock)\;/i)
|
274
274
|
raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
275
275
|
end
|
276
276
|
|
data/test/test_nexus_parser.rb
CHANGED
@@ -56,6 +56,12 @@ class Test_Lexer < Test::Unit::TestCase
|
|
56
56
|
assert lexer2.pop(NexusParser::Tokens::LParen)
|
57
57
|
assert lexer2.pop(NexusParser::Tokens::RParen)
|
58
58
|
|
59
|
+
lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
|
60
|
+
assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
|
61
|
+
assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
|
62
|
+
assert lexer2a.pop(NexusParser::Tokens::LBracket)
|
63
|
+
assert lexer2a.pop(NexusParser::Tokens::RBracket)
|
64
|
+
|
59
65
|
lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
|
60
66
|
assert lexer3.pop(NexusParser::Tokens::LBracket)
|
61
67
|
assert id = lexer3.pop(NexusParser::Tokens::ID)
|
@@ -149,7 +155,7 @@ class Test_Lexer < Test::Unit::TestCase
|
|
149
155
|
def test_EndBlk
|
150
156
|
lexer = NexusParser::Lexer.new(" \n\n End ;")
|
151
157
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
152
|
-
lexer = NexusParser::Lexer.new("\n\
|
158
|
+
lexer = NexusParser::Lexer.new("\n\nEndblock;")
|
153
159
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
154
160
|
|
155
161
|
lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
|
@@ -401,13 +407,13 @@ class Test_Lexer < Test::Unit::TestCase
|
|
401
407
|
CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
|
402
408
|
|
403
409
|
|
404
|
-
|
410
|
+
ENDBLOCK;
|
405
411
|
|
406
412
|
BEGIN some other block;")
|
407
413
|
|
408
414
|
assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
|
409
415
|
assert_equal 'LABELS', foo.value.slice(0,6)
|
410
|
-
assert_equal '
|
416
|
+
assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
|
411
417
|
end
|
412
418
|
|
413
419
|
def test_SetsBlk
|
@@ -513,8 +519,6 @@ class Test_Parser < Test::Unit::TestCase
|
|
513
519
|
assert_equal "Tetragnatha", foo.taxa[9].name
|
514
520
|
end
|
515
521
|
|
516
|
-
|
517
|
-
|
518
522
|
def test_parse_characters_blk
|
519
523
|
input= "
|
520
524
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
@@ -589,7 +593,7 @@ class Test_Parser < Test::Unit::TestCase
|
|
589
593
|
Tetragnatha 0?01011011
|
590
594
|
|
591
595
|
;
|
592
|
-
|
596
|
+
ENDBLOCK;"
|
593
597
|
|
594
598
|
builder = NexusParser::Builder.new
|
595
599
|
@lexer = NexusParser::Lexer.new(input)
|
@@ -655,6 +659,20 @@ class Test_Parser < Test::Unit::TestCase
|
|
655
659
|
# add test that nothing is left in lexer
|
656
660
|
end
|
657
661
|
|
662
|
+
def test_parse_format_respect_case
|
663
|
+
input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
|
664
|
+
builder = NexusParser::Builder.new
|
665
|
+
lexer = NexusParser::Lexer.new(input)
|
666
|
+
|
667
|
+
NexusParser::Parser.new(lexer,builder).parse_format
|
668
|
+
foo = builder.nexus_file
|
669
|
+
|
670
|
+
assert_equal "STANDARD", foo.vars[:datatype]
|
671
|
+
assert_equal "-", foo.vars[:gap]
|
672
|
+
assert_equal "?", foo.vars[:missing]
|
673
|
+
assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
|
674
|
+
end
|
675
|
+
|
658
676
|
def test_parse_chr_state_labels
|
659
677
|
input =" CHARSTATELABELS
|
660
678
|
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nexus_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mjy
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-
|
12
|
+
date: 2024-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
128
|
- !ruby/object:Gem::Version
|
129
129
|
version: '0'
|
130
130
|
requirements: []
|
131
|
-
rubygems_version: 3.5.
|
131
|
+
rubygems_version: 3.5.9
|
132
132
|
signing_key:
|
133
133
|
specification_version: 4
|
134
134
|
summary: A Nexus file format (phylogenetic inference) parser in Ruby.
|