nexus_parser 1.1.4 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE +25 -17
- data/{README.rdoc → README.md} +7 -5
- data/Rakefile +5 -21
- data/lib/{lexer.rb → nexus_parser/lexer.rb} +17 -17
- data/lib/{parser.rb → nexus_parser/parser.rb} +6 -4
- data/lib/{tokens.rb → nexus_parser/tokens.rb} +45 -37
- data/lib/nexus_parser/version.rb +5 -0
- data/lib/nexus_parser.rb +45 -40
- data/nexus_parser.gemspec +49 -50
- data/test/MX_test_03.nex +3 -3
- data/test/test_nexus_parser.rb +156 -142
- metadata +111 -63
- data/MIT-LICENSE +0 -20
- data/README +0 -13
- data/VERSION +0 -1
- data/init.rb +0 -1
data/lib/nexus_parser.rb
CHANGED
@@ -8,12 +8,12 @@
|
|
8
8
|
|
9
9
|
module NexusParser
|
10
10
|
|
11
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
12
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
13
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
14
|
-
|
15
|
-
class NexusParser
|
16
|
-
|
11
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'tokens'))
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'parser'))
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'lexer'))
|
14
|
+
|
15
|
+
class NexusParser
|
16
|
+
|
17
17
|
attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
|
18
18
|
|
19
19
|
def initialize
|
@@ -26,7 +26,9 @@ class NexusParser
|
|
26
26
|
end
|
27
27
|
|
28
28
|
class Character
|
29
|
-
attr_accessor
|
29
|
+
attr_accessor :states, :notes
|
30
|
+
attr_writer :name
|
31
|
+
|
30
32
|
def initialize
|
31
33
|
@name = nil
|
32
34
|
@states = {}
|
@@ -39,7 +41,7 @@ class NexusParser
|
|
39
41
|
:name => ''
|
40
42
|
}.merge!(options)
|
41
43
|
return false if !@opt[:label]
|
42
|
-
|
44
|
+
|
43
45
|
@states.update(@opt[:label] => ChrState.new(@opt[:name]))
|
44
46
|
end
|
45
47
|
|
@@ -49,7 +51,7 @@ class NexusParser
|
|
49
51
|
end
|
50
52
|
|
51
53
|
def name
|
52
|
-
((@name == "") || (@name
|
54
|
+
((@name == "") || (@name.nil?)) ? "Undefined" : @name
|
53
55
|
end
|
54
56
|
end
|
55
57
|
|
@@ -70,17 +72,20 @@ class NexusParser
|
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
73
|
-
class Coding
|
74
|
-
# unfortunately we need this for notes
|
75
|
-
attr_accessor :
|
75
|
+
class Coding
|
76
|
+
# unfortunately we need this for notes
|
77
|
+
attr_accessor :notes
|
78
|
+
attr_writer :state
|
79
|
+
|
76
80
|
def initialize(options = {})
|
77
81
|
@states = options[:states]
|
78
|
-
@notes = []
|
82
|
+
@notes = []
|
79
83
|
end
|
80
84
|
|
81
85
|
def states
|
82
|
-
@states.class == Array ? @states
|
86
|
+
@states.class == Array ? @states : [@states]
|
83
87
|
end
|
88
|
+
|
84
89
|
end
|
85
90
|
|
86
91
|
class Note
|
@@ -98,7 +103,7 @@ class NexusParser
|
|
98
103
|
else
|
99
104
|
n = 'No text recovered, possible parsing error.'
|
100
105
|
end
|
101
|
-
|
106
|
+
|
102
107
|
# THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
|
103
108
|
if n[0..2] =~ /\A\s*\(\s*CM\s*/i
|
104
109
|
n.strip!
|
@@ -107,7 +112,7 @@ class NexusParser
|
|
107
112
|
n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
|
108
113
|
n.strip!
|
109
114
|
n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
|
110
|
-
n = n[1..-2] if n[0..0] == '"'
|
115
|
+
n = n[1..-2] if n[0..0] == '"'
|
111
116
|
end
|
112
117
|
n.strip
|
113
118
|
end
|
@@ -120,11 +125,11 @@ end
|
|
120
125
|
class Builder
|
121
126
|
|
122
127
|
def initialize
|
123
|
-
@nf = NexusParser.new
|
128
|
+
@nf = NexusParser.new
|
124
129
|
end
|
125
130
|
|
126
131
|
def stub_taxon
|
127
|
-
@nf.taxa.push(NexusParser::Taxon.new)
|
132
|
+
@nf.taxa.push(NexusParser::Taxon.new)
|
128
133
|
return @nf.taxa.size
|
129
134
|
end
|
130
135
|
|
@@ -138,11 +143,11 @@ class Builder
|
|
138
143
|
@nf.characters.each_with_index do |c, i|
|
139
144
|
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
140
145
|
@nf.codings[taxon_index.to_i][i] = NexusParser::Coding.new(:states => rowvector[i])
|
141
|
-
|
142
|
-
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
146
|
+
|
147
|
+
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
143
148
|
|
144
149
|
existing_states = @nf.characters[i].state_labels
|
145
|
-
|
150
|
+
|
146
151
|
new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
|
147
152
|
new_states.delete("?") # we don't add this to the db
|
148
153
|
new_states = new_states - existing_states
|
@@ -160,12 +165,12 @@ class Builder
|
|
160
165
|
end
|
161
166
|
@nf.vars.update(hash)
|
162
167
|
end
|
163
|
-
|
168
|
+
|
164
169
|
def update_taxon(options = {})
|
165
170
|
@opt = {
|
166
171
|
:name => ''
|
167
172
|
}.merge!(options)
|
168
|
-
return false if !@opt[:index]
|
173
|
+
return false if !@opt[:index]
|
169
174
|
(@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
|
170
175
|
end
|
171
176
|
|
@@ -177,29 +182,29 @@ class Builder
|
|
177
182
|
return false if !@opt[:index]
|
178
183
|
|
179
184
|
@index = @opt[:index].to_i
|
180
|
-
|
185
|
+
|
181
186
|
# need to create the characters
|
182
|
-
|
187
|
+
|
183
188
|
raise(NexusParser::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
184
189
|
|
185
190
|
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
186
|
-
|
191
|
+
|
187
192
|
@opt.delete(:index)
|
188
193
|
@opt.delete(:name)
|
189
|
-
|
194
|
+
|
190
195
|
# the rest have states
|
191
196
|
@opt.keys.each do |k|
|
192
|
-
|
197
|
+
|
193
198
|
if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
|
194
|
-
|
199
|
+
|
195
200
|
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
196
201
|
update_state(@index, :index => k, :name => @opt[k])
|
197
|
-
|
202
|
+
|
198
203
|
else # doesn't, create it
|
199
204
|
@nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
|
200
205
|
end
|
201
206
|
end
|
202
|
-
|
207
|
+
|
203
208
|
end
|
204
209
|
|
205
210
|
def update_state(chr_index, options = {})
|
@@ -218,11 +223,11 @@ class Builder
|
|
218
223
|
case @opt[:type]
|
219
224
|
|
220
225
|
# Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
|
221
|
-
when 'TEXT' # a footnote
|
226
|
+
when 'TEXT' # a footnote
|
222
227
|
if @opt[:file]
|
223
228
|
@nf.notes << NexusParser::Note.new(@opt)
|
224
|
-
|
225
|
-
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
229
|
+
|
230
|
+
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
226
231
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
|
227
232
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
228
233
|
|
@@ -230,7 +235,7 @@ class Builder
|
|
230
235
|
@nf.taxa[@opt[:taxon].to_i - 1].notes << NexusParser::Note.new(@opt)
|
231
236
|
|
232
237
|
elsif @opt[:character] && !@opt[:taxon]
|
233
|
-
|
238
|
+
|
234
239
|
@nf.characters[@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
235
240
|
end
|
236
241
|
|
@@ -244,7 +249,7 @@ class Builder
|
|
244
249
|
@nf.characters[@opt[:c].to_i - 1].notes << NexusParser::Note.new(@opt)
|
245
250
|
end
|
246
251
|
end
|
247
|
-
|
252
|
+
|
248
253
|
end
|
249
254
|
|
250
255
|
def nexus_file
|
@@ -264,15 +269,15 @@ end # end module
|
|
264
269
|
def parse_nexus_file(input)
|
265
270
|
@input = input
|
266
271
|
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
267
|
-
# quickly peek at the input, does this look like a Nexus file?
|
268
|
-
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
272
|
+
# quickly peek at the input, does this look like a Nexus file?
|
273
|
+
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /(end|endblock)\;/i)
|
269
274
|
raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
270
275
|
end
|
271
276
|
|
272
277
|
builder = NexusParser::Builder.new
|
273
278
|
lexer = NexusParser::Lexer.new(@input)
|
274
279
|
NexusParser::Parser.new(lexer, builder).parse_file
|
275
|
-
|
276
|
-
return builder.nexus_file
|
280
|
+
|
281
|
+
return builder.nexus_file
|
277
282
|
end
|
278
283
|
|
data/nexus_parser.gemspec
CHANGED
@@ -1,60 +1,59 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/nexus_parser/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "nexus_parser"
|
7
|
+
spec.version = NexusParser::VERSION
|
8
|
+
spec.authors = ["mjy", "kleintom"]
|
9
|
+
spec.email = ["diapriid@gmail.com"]
|
10
|
+
|
11
|
+
spec.summary = "A Nexus file format (phylogenetic inference) parser in Ruby."
|
12
|
+
spec.description = "A full featured and extensible Nexus file parser in Ruby."
|
13
|
+
spec.homepage = "http://github.com/mjy/nexus_parser"
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
+
|
18
|
+
spec.extra_rdoc_files = [
|
16
19
|
"LICENSE",
|
17
|
-
|
18
|
-
"README.rdoc"
|
20
|
+
"README.md"
|
19
21
|
]
|
20
|
-
|
22
|
+
spec.files = [
|
21
23
|
".document",
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
"test/test.nex",
|
39
|
-
"test/test_nexus_parser.rb",
|
40
|
-
"uninstall.rb"
|
24
|
+
".gitignore",
|
25
|
+
"LICENSE",
|
26
|
+
"README.md",
|
27
|
+
"Rakefile",
|
28
|
+
"install.rb",
|
29
|
+
"lib/nexus_parser.rb",
|
30
|
+
"lib/nexus_parser/lexer.rb",
|
31
|
+
"lib/nexus_parser/parser.rb",
|
32
|
+
"lib/nexus_parser/tokens.rb",
|
33
|
+
"lib/nexus_parser/version.rb",
|
34
|
+
"nexus_parser.gemspec",
|
35
|
+
"tasks/nexus_parser_tasks.rake",
|
36
|
+
"test/MX_test_03.nex",
|
37
|
+
"test/test.nex",
|
38
|
+
"test/test_nexus_parser.rb",
|
39
|
+
"uninstall.rb"
|
41
40
|
]
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
41
|
+
|
42
|
+
spec.bindir = "exe"
|
43
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
44
|
+
spec.rdoc_options = ["--charset=UTF-8"]
|
45
|
+
spec.require_paths = ["lib"]
|
46
|
+
|
47
|
+
spec.test_files = [
|
48
48
|
"test/test_nexus_parser.rb"
|
49
49
|
]
|
50
50
|
|
51
|
-
|
52
|
-
s.specification_version = 3
|
51
|
+
spec.required_ruby_version = '>= 3.3.0'
|
53
52
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
53
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
54
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
55
|
+
spec.add_development_dependency 'rdoc', '~> 6.6.2'
|
56
|
+
spec.add_development_dependency 'byebug', '~> 11.1'
|
57
|
+
spec.add_development_dependency 'test-unit'
|
59
58
|
end
|
60
59
|
|
data/test/MX_test_03.nex
CHANGED
@@ -9,7 +9,7 @@ BEGIN TAXA;
|
|
9
9
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
10
10
|
DIMENSIONS NTAX=10;
|
11
11
|
TAXLABELS
|
12
|
-
Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
|
12
|
+
Dictyna Uloborus Deinopis Nephila&Herennia 'Nephilengys_cruentata' Meta Leucauge_venusta Pachygnatha 'Theridiosoma_01' Tetragnatha
|
13
13
|
;
|
14
14
|
IDS JC1191fcddc2b128 JC1191fcddc2b129 JC1191fcddc2b130 JC1191fcddc2b131 JC1191fcddc2b132 JC1191fcddc2b133 JC1191fcddc2b134 JC1191fcddc2b135 JC1191fcddc2b137 JC1191fcddc2b136 ;
|
15
15
|
BLOCKID JC1191fcddc0c4;
|
@@ -21,8 +21,8 @@ BEGIN CHARACTERS;
|
|
21
21
|
TITLE 'Scharff&Coddington_1997_Araneidae';
|
22
22
|
DIMENSIONS NCHAR=10;
|
23
23
|
FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = " 0 1 2 3 4 5 6 7 8 9 A";
|
24
|
-
CHARSTATELABELS
|
25
|
-
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
24
|
+
CHARSTATELABELS
|
25
|
+
1 Tibia_II / norm modified, 2 TII_macrosetae / '= TI' stronger, 3 Femoral_tuber / abs pres 'm-setae', 5 Cymbium / dorsal mesal lateral, 6 Paracymbium / abs pres, 7 Globular_tegulum / abs pres, 8 / entire w_lobe, 9 Conductor_wraps_embolus, 10 Median_apophysis / pres abs ;
|
26
26
|
MATRIX
|
27
27
|
Dictyna 0?00201001
|
28
28
|
Uloborus 0?11000000
|