nexus_parser 1.1.4 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE +25 -17
- data/{README.rdoc → README.md} +7 -5
- data/Rakefile +5 -21
- data/lib/{lexer.rb → nexus_parser/lexer.rb} +17 -17
- data/lib/{parser.rb → nexus_parser/parser.rb} +6 -4
- data/lib/{tokens.rb → nexus_parser/tokens.rb} +45 -37
- data/lib/nexus_parser/version.rb +5 -0
- data/lib/nexus_parser.rb +45 -40
- data/nexus_parser.gemspec +49 -50
- data/test/MX_test_03.nex +3 -3
- data/test/test_nexus_parser.rb +156 -142
- metadata +111 -63
- data/MIT-LICENSE +0 -20
- data/README +0 -13
- data/VERSION +0 -1
- data/init.rb +0 -1
data/lib/nexus_parser.rb
CHANGED
|
@@ -8,12 +8,12 @@
|
|
|
8
8
|
|
|
9
9
|
module NexusParser
|
|
10
10
|
|
|
11
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
|
12
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
|
13
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
|
14
|
-
|
|
15
|
-
class NexusParser
|
|
16
|
-
|
|
11
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'tokens'))
|
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'parser'))
|
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'lexer'))
|
|
14
|
+
|
|
15
|
+
class NexusParser
|
|
16
|
+
|
|
17
17
|
attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
|
|
18
18
|
|
|
19
19
|
def initialize
|
|
@@ -26,7 +26,9 @@ class NexusParser
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
class Character
|
|
29
|
-
attr_accessor
|
|
29
|
+
attr_accessor :states, :notes
|
|
30
|
+
attr_writer :name
|
|
31
|
+
|
|
30
32
|
def initialize
|
|
31
33
|
@name = nil
|
|
32
34
|
@states = {}
|
|
@@ -39,7 +41,7 @@ class NexusParser
|
|
|
39
41
|
:name => ''
|
|
40
42
|
}.merge!(options)
|
|
41
43
|
return false if !@opt[:label]
|
|
42
|
-
|
|
44
|
+
|
|
43
45
|
@states.update(@opt[:label] => ChrState.new(@opt[:name]))
|
|
44
46
|
end
|
|
45
47
|
|
|
@@ -49,7 +51,7 @@ class NexusParser
|
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
def name
|
|
52
|
-
((@name == "") || (@name
|
|
54
|
+
((@name == "") || (@name.nil?)) ? "Undefined" : @name
|
|
53
55
|
end
|
|
54
56
|
end
|
|
55
57
|
|
|
@@ -70,17 +72,20 @@ class NexusParser
|
|
|
70
72
|
end
|
|
71
73
|
end
|
|
72
74
|
|
|
73
|
-
class Coding
|
|
74
|
-
# unfortunately we need this for notes
|
|
75
|
-
attr_accessor :
|
|
75
|
+
class Coding
|
|
76
|
+
# unfortunately we need this for notes
|
|
77
|
+
attr_accessor :notes
|
|
78
|
+
attr_writer :state
|
|
79
|
+
|
|
76
80
|
def initialize(options = {})
|
|
77
81
|
@states = options[:states]
|
|
78
|
-
@notes = []
|
|
82
|
+
@notes = []
|
|
79
83
|
end
|
|
80
84
|
|
|
81
85
|
def states
|
|
82
|
-
@states.class == Array ? @states
|
|
86
|
+
@states.class == Array ? @states : [@states]
|
|
83
87
|
end
|
|
88
|
+
|
|
84
89
|
end
|
|
85
90
|
|
|
86
91
|
class Note
|
|
@@ -98,7 +103,7 @@ class NexusParser
|
|
|
98
103
|
else
|
|
99
104
|
n = 'No text recovered, possible parsing error.'
|
|
100
105
|
end
|
|
101
|
-
|
|
106
|
+
|
|
102
107
|
# THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
|
|
103
108
|
if n[0..2] =~ /\A\s*\(\s*CM\s*/i
|
|
104
109
|
n.strip!
|
|
@@ -107,7 +112,7 @@ class NexusParser
|
|
|
107
112
|
n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
|
|
108
113
|
n.strip!
|
|
109
114
|
n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
|
|
110
|
-
n = n[1..-2] if n[0..0] == '"'
|
|
115
|
+
n = n[1..-2] if n[0..0] == '"'
|
|
111
116
|
end
|
|
112
117
|
n.strip
|
|
113
118
|
end
|
|
@@ -120,11 +125,11 @@ end
|
|
|
120
125
|
class Builder
|
|
121
126
|
|
|
122
127
|
def initialize
|
|
123
|
-
@nf = NexusParser.new
|
|
128
|
+
@nf = NexusParser.new
|
|
124
129
|
end
|
|
125
130
|
|
|
126
131
|
def stub_taxon
|
|
127
|
-
@nf.taxa.push(NexusParser::Taxon.new)
|
|
132
|
+
@nf.taxa.push(NexusParser::Taxon.new)
|
|
128
133
|
return @nf.taxa.size
|
|
129
134
|
end
|
|
130
135
|
|
|
@@ -138,11 +143,11 @@ class Builder
|
|
|
138
143
|
@nf.characters.each_with_index do |c, i|
|
|
139
144
|
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
|
140
145
|
@nf.codings[taxon_index.to_i][i] = NexusParser::Coding.new(:states => rowvector[i])
|
|
141
|
-
|
|
142
|
-
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
|
146
|
+
|
|
147
|
+
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
|
143
148
|
|
|
144
149
|
existing_states = @nf.characters[i].state_labels
|
|
145
|
-
|
|
150
|
+
|
|
146
151
|
new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
|
|
147
152
|
new_states.delete("?") # we don't add this to the db
|
|
148
153
|
new_states = new_states - existing_states
|
|
@@ -160,12 +165,12 @@ class Builder
|
|
|
160
165
|
end
|
|
161
166
|
@nf.vars.update(hash)
|
|
162
167
|
end
|
|
163
|
-
|
|
168
|
+
|
|
164
169
|
def update_taxon(options = {})
|
|
165
170
|
@opt = {
|
|
166
171
|
:name => ''
|
|
167
172
|
}.merge!(options)
|
|
168
|
-
return false if !@opt[:index]
|
|
173
|
+
return false if !@opt[:index]
|
|
169
174
|
(@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
|
|
170
175
|
end
|
|
171
176
|
|
|
@@ -177,29 +182,29 @@ class Builder
|
|
|
177
182
|
return false if !@opt[:index]
|
|
178
183
|
|
|
179
184
|
@index = @opt[:index].to_i
|
|
180
|
-
|
|
185
|
+
|
|
181
186
|
# need to create the characters
|
|
182
|
-
|
|
187
|
+
|
|
183
188
|
raise(NexusParser::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
|
184
189
|
|
|
185
190
|
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
|
186
|
-
|
|
191
|
+
|
|
187
192
|
@opt.delete(:index)
|
|
188
193
|
@opt.delete(:name)
|
|
189
|
-
|
|
194
|
+
|
|
190
195
|
# the rest have states
|
|
191
196
|
@opt.keys.each do |k|
|
|
192
|
-
|
|
197
|
+
|
|
193
198
|
if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
|
|
194
|
-
|
|
199
|
+
|
|
195
200
|
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
|
196
201
|
update_state(@index, :index => k, :name => @opt[k])
|
|
197
|
-
|
|
202
|
+
|
|
198
203
|
else # doesn't, create it
|
|
199
204
|
@nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
|
|
200
205
|
end
|
|
201
206
|
end
|
|
202
|
-
|
|
207
|
+
|
|
203
208
|
end
|
|
204
209
|
|
|
205
210
|
def update_state(chr_index, options = {})
|
|
@@ -218,11 +223,11 @@ class Builder
|
|
|
218
223
|
case @opt[:type]
|
|
219
224
|
|
|
220
225
|
# Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
|
|
221
|
-
when 'TEXT' # a footnote
|
|
226
|
+
when 'TEXT' # a footnote
|
|
222
227
|
if @opt[:file]
|
|
223
228
|
@nf.notes << NexusParser::Note.new(@opt)
|
|
224
|
-
|
|
225
|
-
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
|
229
|
+
|
|
230
|
+
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
|
226
231
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
|
|
227
232
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
228
233
|
|
|
@@ -230,7 +235,7 @@ class Builder
|
|
|
230
235
|
@nf.taxa[@opt[:taxon].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
231
236
|
|
|
232
237
|
elsif @opt[:character] && !@opt[:taxon]
|
|
233
|
-
|
|
238
|
+
|
|
234
239
|
@nf.characters[@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
235
240
|
end
|
|
236
241
|
|
|
@@ -244,7 +249,7 @@ class Builder
|
|
|
244
249
|
@nf.characters[@opt[:c].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
245
250
|
end
|
|
246
251
|
end
|
|
247
|
-
|
|
252
|
+
|
|
248
253
|
end
|
|
249
254
|
|
|
250
255
|
def nexus_file
|
|
@@ -264,15 +269,15 @@ end # end module
|
|
|
264
269
|
def parse_nexus_file(input)
|
|
265
270
|
@input = input
|
|
266
271
|
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
|
267
|
-
# quickly peek at the input, does this look like a Nexus file?
|
|
268
|
-
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
|
272
|
+
# quickly peek at the input, does this look like a Nexus file?
|
|
273
|
+
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /(end|endblock)\;/i)
|
|
269
274
|
raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
|
270
275
|
end
|
|
271
276
|
|
|
272
277
|
builder = NexusParser::Builder.new
|
|
273
278
|
lexer = NexusParser::Lexer.new(@input)
|
|
274
279
|
NexusParser::Parser.new(lexer, builder).parse_file
|
|
275
|
-
|
|
276
|
-
return builder.nexus_file
|
|
280
|
+
|
|
281
|
+
return builder.nexus_file
|
|
277
282
|
end
|
|
278
283
|
|
data/nexus_parser.gemspec
CHANGED
|
@@ -1,60 +1,59 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/nexus_parser/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "nexus_parser"
|
|
7
|
+
spec.version = NexusParser::VERSION
|
|
8
|
+
spec.authors = ["mjy", "kleintom"]
|
|
9
|
+
spec.email = ["diapriid@gmail.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "A Nexus file format (phylogenetic inference) parser in Ruby."
|
|
12
|
+
spec.description = "A full featured and extensible Nexus file parser in Ruby."
|
|
13
|
+
spec.homepage = "http://github.com/mjy/nexus_parser"
|
|
14
|
+
|
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
|
17
|
+
|
|
18
|
+
spec.extra_rdoc_files = [
|
|
16
19
|
"LICENSE",
|
|
17
|
-
|
|
18
|
-
"README.rdoc"
|
|
20
|
+
"README.md"
|
|
19
21
|
]
|
|
20
|
-
|
|
22
|
+
spec.files = [
|
|
21
23
|
".document",
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"test/test.nex",
|
|
39
|
-
"test/test_nexus_parser.rb",
|
|
40
|
-
"uninstall.rb"
|
|
24
|
+
".gitignore",
|
|
25
|
+
"LICENSE",
|
|
26
|
+
"README.md",
|
|
27
|
+
"Rakefile",
|
|
28
|
+
"install.rb",
|
|
29
|
+
"lib/nexus_parser.rb",
|
|
30
|
+
"lib/nexus_parser/lexer.rb",
|
|
31
|
+
"lib/nexus_parser/parser.rb",
|
|
32
|
+
"lib/nexus_parser/tokens.rb",
|
|
33
|
+
"lib/nexus_parser/version.rb",
|
|
34
|
+
"nexus_parser.gemspec",
|
|
35
|
+
"tasks/nexus_parser_tasks.rake",
|
|
36
|
+
"test/MX_test_03.nex",
|
|
37
|
+
"test/test.nex",
|
|
38
|
+
"test/test_nexus_parser.rb",
|
|
39
|
+
"uninstall.rb"
|
|
41
40
|
]
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
41
|
+
|
|
42
|
+
spec.bindir = "exe"
|
|
43
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
44
|
+
spec.rdoc_options = ["--charset=UTF-8"]
|
|
45
|
+
spec.require_paths = ["lib"]
|
|
46
|
+
|
|
47
|
+
spec.test_files = [
|
|
48
48
|
"test/test_nexus_parser.rb"
|
|
49
49
|
]
|
|
50
50
|
|
|
51
|
-
|
|
52
|
-
s.specification_version = 3
|
|
51
|
+
spec.required_ruby_version = '>= 3.3.0'
|
|
53
52
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
53
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
|
54
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
|
55
|
+
spec.add_development_dependency 'rdoc', '~> 6.6.2'
|
|
56
|
+
spec.add_development_dependency 'byebug', '~> 11.1'
|
|
57
|
+
spec.add_development_dependency 'test-unit'
|
|
59
58
|
end
|
|
60
59
|
|
data/test/MX_test_03.nex
CHANGED
|
@@ -9,7 +9,7 @@ BEGIN TAXA;
|
|
|
9
9
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
|
10
10
|
DIMENSIONS NTAX=10;
|
|
11
11
|
TAXLABELS
|
|
12
|
-
Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
|
|
12
|
+
Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
|
|
13
13
|
;
|
|
14
14
|
IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b137 JC1191fcddc2b136 ;
|
|
15
15
|
BLOCKID JC1191fcddc0c4;
|
|
@@ -21,8 +21,8 @@ BEGIN CHARACTERS;
|
|
|
21
21
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
|
22
22
|
DIMENSIONS NCHAR=10;
|
|
23
23
|
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = " 0 1 2 3 4 5 6 7 8 9 A";
|
|
24
|
-
CHARSTATELABELS
|
|
25
|
-
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
|
24
|
+
CHARSTATELABELS
|
|
25
|
+
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
|
26
26
|
MATRIX
|
|
27
27
|
Dictyna 0?00201001
|
|
28
28
|
Uloborus 0?11000000
|