nexus_parser 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/lib/nexus_parser/parser.rb +4 -2
- data/lib/nexus_parser/tokens.rb +18 -10
- data/lib/nexus_parser/version.rb +1 -1
- data/lib/nexus_parser.rb +3 -3
- data/test/test_nexus_parser.rb +24 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be7f8d6dc4a222f456df1bb18dc3d63182cfb83b88ee036c227a93883c5ff70a
|
4
|
+
data.tar.gz: 1ab8785c3ca791476efe19d290ef25f20dc790792cf82fdee4ab1a0cd7468347
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2c206656a9c8a1760e158641923b47f789ef42156fd9486dd75f70f92db670f31308adf739355eca6192ae3c702f6868b04fcab8ab87e7e44590371b3838bf7
|
7
|
+
data.tar.gz: fb4a0f18b0430dc04aa4feebd9d9ea46fb91b8557bc61efe7d75d4ad4427da1a0fa9f0632a3afe074fe5e47c17b18f9cd6a58786833b6c07a7181f819cece0d8
|
data/.gitignore
CHANGED
data/lib/nexus_parser/parser.rb
CHANGED
@@ -151,8 +151,10 @@ class NexusParser::Parser
|
|
151
151
|
# prolly pop header then fuse with parse_dimensions
|
152
152
|
def parse_format
|
153
153
|
@lexer.pop(NexusParser::Tokens::Format)
|
154
|
-
|
155
|
-
|
154
|
+
|
155
|
+
while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
|
156
|
+
@lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
|
157
|
+
@builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) if @lexer.peek(NexusParser::Tokens::ValuePair)
|
156
158
|
end
|
157
159
|
|
158
160
|
check_initialization_of_ntax_nchar
|
data/lib/nexus_parser/tokens.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module NexusParser::Tokens
|
2
2
|
|
3
|
+
ENDBLKSTR = '(end|endblock)'.freeze
|
4
|
+
|
3
5
|
class Token
|
4
6
|
# this allows access the the class attribute regexp, without using a class variable
|
5
7
|
class << self; attr_reader :regexp; end
|
@@ -31,12 +33,12 @@ module NexusParser::Tokens
|
|
31
33
|
end
|
32
34
|
|
33
35
|
class EndBlk < Token
|
34
|
-
@regexp = Regexp.new(/\A\s*([\s]
|
36
|
+
@regexp = Regexp.new(/\A\s*([\s]*#{ENDBLKSTR}[\s]*;[\s]*)/i)
|
35
37
|
end
|
36
38
|
|
37
39
|
# label
|
38
40
|
class AuthorsBlk < Token
|
39
|
-
@regexp = Regexp.new(/\A\s*(Authors
|
41
|
+
@regexp = Regexp.new(/\A\s*(Authors;.*?#{ENDBLKSTR};)\s*/im)
|
40
42
|
end
|
41
43
|
|
42
44
|
# label
|
@@ -66,6 +68,11 @@ module NexusParser::Tokens
|
|
66
68
|
@regexp = Regexp.new(/\A\s*(format)\s*/i)
|
67
69
|
end
|
68
70
|
|
71
|
+
# TODO: Handled, but ignored
|
72
|
+
class RespectCase < Token
|
73
|
+
@regexp = Regexp.new(/\A\s*(respectcase)\s*/i)
|
74
|
+
end
|
75
|
+
|
69
76
|
# label
|
70
77
|
class Taxlabels < Token
|
71
78
|
@regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
|
@@ -133,35 +140,35 @@ module NexusParser::Tokens
|
|
133
140
|
# unparsed blocks
|
134
141
|
|
135
142
|
class TreesBlk < Token
|
136
|
-
@regexp = Regexp.new(/\A\s*(trees
|
143
|
+
@regexp = Regexp.new(/\A\s*(trees;.*?#{ENDBLKSTR};)\s*/im) # note the multi-line /m
|
137
144
|
end
|
138
145
|
|
139
146
|
class SetsBlk < Token
|
140
|
-
@regexp = Regexp.new(/\A\s*(sets
|
147
|
+
@regexp = Regexp.new(/\A\s*(sets;.*?#{ENDBLKSTR};)\s*/im)
|
141
148
|
end
|
142
149
|
|
143
150
|
class MqCharModelsBlk < Token
|
144
|
-
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS
|
151
|
+
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?#{ENDBLKSTR};)\s*/im)
|
145
152
|
end
|
146
153
|
|
147
154
|
class LabelsBlk < Token
|
148
|
-
@regexp = Regexp.new(/\A\s*(LABELS
|
155
|
+
@regexp = Regexp.new(/\A\s*(LABELS;.*?#{ENDBLKSTR};)\s*/im)
|
149
156
|
end
|
150
157
|
|
151
158
|
class AssumptionsBlk < Token
|
152
|
-
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS
|
159
|
+
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?#{ENDBLKSTR};)\s*/im)
|
153
160
|
end
|
154
161
|
|
155
162
|
class CodonsBlk < Token
|
156
|
-
@regexp = Regexp.new(/\A\s*(CODONS
|
163
|
+
@regexp = Regexp.new(/\A\s*(CODONS;.*?#{ENDBLKSTR};)\s*/im)
|
157
164
|
end
|
158
165
|
|
159
166
|
class MesquiteBlk < Token
|
160
|
-
@regexp = Regexp.new(/\A\s*(Mesquite
|
167
|
+
@regexp = Regexp.new(/\A\s*(Mesquite;.*?#{ENDBLKSTR};)\s*/im)
|
161
168
|
end
|
162
169
|
|
163
170
|
class BlkEnd < Token
|
164
|
-
@regexp = Regexp.new(/\A[\s]*(
|
171
|
+
@regexp = Regexp.new(/\A[\s]*(#{ENDBLKSTR};)\s*/i)
|
165
172
|
end
|
166
173
|
|
167
174
|
class LBracket < Token
|
@@ -246,6 +253,7 @@ module NexusParser::Tokens
|
|
246
253
|
NexusParser::Tokens::Dimensions,
|
247
254
|
NexusParser::Tokens::FileLbl,
|
248
255
|
NexusParser::Tokens::Format,
|
256
|
+
NexusParser::Tokens::RespectCase,
|
249
257
|
NexusParser::Tokens::Equals,
|
250
258
|
NexusParser::Tokens::ValuePair, # this has bad overlap with Label and likely IDs (need to kill the latter, its a lesser Label)
|
251
259
|
NexusParser::Tokens::CharStateLabels,
|
data/lib/nexus_parser/version.rb
CHANGED
data/lib/nexus_parser.rb
CHANGED
@@ -75,7 +75,7 @@ class NexusParser
|
|
75
75
|
class Coding
|
76
76
|
# unfortunately we need this for notes
|
77
77
|
attr_accessor :notes
|
78
|
-
attr_writer :state
|
78
|
+
attr_writer :state
|
79
79
|
|
80
80
|
def initialize(options = {})
|
81
81
|
@states = options[:states]
|
@@ -85,7 +85,7 @@ class NexusParser
|
|
85
85
|
def states
|
86
86
|
@states.class == Array ? @states : [@states]
|
87
87
|
end
|
88
|
-
|
88
|
+
|
89
89
|
end
|
90
90
|
|
91
91
|
class Note
|
@@ -270,7 +270,7 @@ def parse_nexus_file(input)
|
|
270
270
|
@input = input
|
271
271
|
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
272
272
|
# quickly peek at the input, does this look like a Nexus file?
|
273
|
-
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
273
|
+
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /(end|endblock)\;/i)
|
274
274
|
raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
275
275
|
end
|
276
276
|
|
data/test/test_nexus_parser.rb
CHANGED
@@ -56,6 +56,12 @@ class Test_Lexer < Test::Unit::TestCase
|
|
56
56
|
assert lexer2.pop(NexusParser::Tokens::LParen)
|
57
57
|
assert lexer2.pop(NexusParser::Tokens::RParen)
|
58
58
|
|
59
|
+
lexer2a = NexusParser::Lexer.new("begin authors; BLORF endblock; []")
|
60
|
+
assert lexer2a.pop(NexusParser::Tokens::BeginBlk)
|
61
|
+
assert lexer2a.pop(NexusParser::Tokens::AuthorsBlk)
|
62
|
+
assert lexer2a.pop(NexusParser::Tokens::LBracket)
|
63
|
+
assert lexer2a.pop(NexusParser::Tokens::RBracket)
|
64
|
+
|
59
65
|
lexer3 = NexusParser::Lexer.new("[ foo ] Begin Characters; BLORF end; [] () some crud here")
|
60
66
|
assert lexer3.pop(NexusParser::Tokens::LBracket)
|
61
67
|
assert id = lexer3.pop(NexusParser::Tokens::ID)
|
@@ -149,7 +155,7 @@ class Test_Lexer < Test::Unit::TestCase
|
|
149
155
|
def test_EndBlk
|
150
156
|
lexer = NexusParser::Lexer.new(" \n\n End ;")
|
151
157
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
152
|
-
lexer = NexusParser::Lexer.new("\n\
|
158
|
+
lexer = NexusParser::Lexer.new("\n\nEndblock;")
|
153
159
|
assert foo = lexer.pop(NexusParser::Tokens::EndBlk)
|
154
160
|
|
155
161
|
lexer = NexusParser::Lexer.new("123123 \n\nEnd;")
|
@@ -401,13 +407,13 @@ class Test_Lexer < Test::Unit::TestCase
|
|
401
407
|
CHARGROUPLABEL Behavior COLOR = (RGB 1.0 0.46666667 1.0) ;
|
402
408
|
|
403
409
|
|
404
|
-
|
410
|
+
ENDBLOCK;
|
405
411
|
|
406
412
|
BEGIN some other block;")
|
407
413
|
|
408
414
|
assert foo = lexer.pop(NexusParser::Tokens::LabelsBlk)
|
409
415
|
assert_equal 'LABELS', foo.value.slice(0,6)
|
410
|
-
assert_equal '
|
416
|
+
assert_equal 'ENDBLOCK;', foo.value.slice(-9,9)
|
411
417
|
end
|
412
418
|
|
413
419
|
def test_SetsBlk
|
@@ -513,8 +519,6 @@ class Test_Parser < Test::Unit::TestCase
|
|
513
519
|
assert_equal "Tetragnatha", foo.taxa[9].name
|
514
520
|
end
|
515
521
|
|
516
|
-
|
517
|
-
|
518
522
|
def test_parse_characters_blk
|
519
523
|
input= "
|
520
524
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
@@ -589,7 +593,7 @@ class Test_Parser < Test::Unit::TestCase
|
|
589
593
|
Tetragnatha 0?01011011
|
590
594
|
|
591
595
|
;
|
592
|
-
|
596
|
+
ENDBLOCK;"
|
593
597
|
|
594
598
|
builder = NexusParser::Builder.new
|
595
599
|
@lexer = NexusParser::Lexer.new(input)
|
@@ -655,6 +659,20 @@ class Test_Parser < Test::Unit::TestCase
|
|
655
659
|
# add test that nothing is left in lexer
|
656
660
|
end
|
657
661
|
|
662
|
+
def test_parse_format_respect_case
|
663
|
+
input = "FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9 A\";"
|
664
|
+
builder = NexusParser::Builder.new
|
665
|
+
lexer = NexusParser::Lexer.new(input)
|
666
|
+
|
667
|
+
NexusParser::Parser.new(lexer,builder).parse_format
|
668
|
+
foo = builder.nexus_file
|
669
|
+
|
670
|
+
assert_equal "STANDARD", foo.vars[:datatype]
|
671
|
+
assert_equal "-", foo.vars[:gap]
|
672
|
+
assert_equal "?", foo.vars[:missing]
|
673
|
+
assert_equal '0 1 2 3 4 5 6 7 8 9 A', foo.vars[:symbols]
|
674
|
+
end
|
675
|
+
|
658
676
|
def test_parse_chr_state_labels
|
659
677
|
input =" CHARSTATELABELS
|
660
678
|
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nexus_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mjy
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-
|
12
|
+
date: 2024-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
128
|
- !ruby/object:Gem::Version
|
129
129
|
version: '0'
|
130
130
|
requirements: []
|
131
|
-
rubygems_version: 3.5.
|
131
|
+
rubygems_version: 3.5.9
|
132
132
|
signing_key:
|
133
133
|
specification_version: 4
|
134
134
|
summary: A Nexus file format (phylogenetic inference) parser in Ruby.
|