nexus_parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/MIT-LICENSE +20 -0
- data/README +13 -0
- data/README.rdoc +17 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/init.rb +1 -0
- data/install.rb +1 -0
- data/lib/lexer.rb +66 -0
- data/lib/nexus_file.rb +282 -0
- data/lib/parser.rb +334 -0
- data/lib/tokens.rb +269 -0
- data/tasks/nexus_parser_tasks.rake +4 -0
- data/test/MX_test_03.nex +234 -0
- data/test/test.nex +382 -0
- data/test/test_nexus_parser.rb +937 -0
- data/uninstall.rb +1 -0
- metadata +82 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 mjy
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 [name of plugin creator]
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= nexus_parser
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Note on Patches/Pull Requests
|
6
|
+
|
7
|
+
* Fork the project.
|
8
|
+
* Make your feature addition or bug fix.
|
9
|
+
* Add tests for it. This is important so I don't break it in a
|
10
|
+
future version unintentionally.
|
11
|
+
* Commit, do not mess with rakefile, version, or history.
|
12
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
+
* Send me a pull request. Bonus points for topic branches.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2010 mjy. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "nexus_parser"
|
8
|
+
gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
|
9
|
+
gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
|
10
|
+
gem.email = "diapriid@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/mjy/nexus_parser"
|
12
|
+
gem.authors = ["mjy"]
|
13
|
+
# gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "nexus_parser #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# Include hook code here
|
data/install.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# Install hook code here
|
data/lib/lexer.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
class NexusFile::Lexer
|
4
|
+
|
5
|
+
def initialize(input)
|
6
|
+
@input = input
|
7
|
+
# linefeed check the input here -
|
8
|
+
@input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
|
9
|
+
@next_token = nil
|
10
|
+
end
|
11
|
+
|
12
|
+
# checks whether the next token is of the specified class.
|
13
|
+
def peek(token_class)
|
14
|
+
token = read_next_token(token_class)
|
15
|
+
return token.class == token_class
|
16
|
+
end
|
17
|
+
|
18
|
+
# return (and delete) the next token from the input stream, or raise an exception
|
19
|
+
# if the next token is not of the given class.
|
20
|
+
def pop(token_class)
|
21
|
+
token = read_next_token(token_class)
|
22
|
+
@next_token = nil
|
23
|
+
if token.class != token_class
|
24
|
+
raise(NexusFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
|
25
|
+
else
|
26
|
+
return token
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
# read (and store) the next token from the input, if it has not already been read.
|
32
|
+
def read_next_token(token_class)
|
33
|
+
if @next_token
|
34
|
+
return @next_token
|
35
|
+
else
|
36
|
+
# check for a match on the specified class first
|
37
|
+
if match(token_class)
|
38
|
+
return @next_token
|
39
|
+
else
|
40
|
+
# now check all the tokens for a match
|
41
|
+
NexusFile::Tokens.nexus_file_token_list.each {|t|
|
42
|
+
return @next_token if match(t)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
# no match, either end of string or lex-error
|
46
|
+
if @input != ''
|
47
|
+
raise( NexusFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
48
|
+
else
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def match(token_class)
|
55
|
+
if (m = token_class.regexp.match(@input))
|
56
|
+
@next_token = token_class.new(m[1])
|
57
|
+
@input = @input[m.end(0)..-1]
|
58
|
+
return true
|
59
|
+
else
|
60
|
+
return false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
|
data/lib/nexus_file.rb
ADDED
@@ -0,0 +1,282 @@
|
|
1
|
+
# NexusParser
|
2
|
+
|
3
|
+
# version 0.3 by Matt Yoder
|
4
|
+
# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
|
5
|
+
# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
|
6
|
+
|
7
|
+
# outstanding issues:
|
8
|
+
## need to resolve Tokens Labels, ValuePair, IDs
|
9
|
+
|
10
|
+
module NexusFile
|
11
|
+
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
14
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
15
|
+
|
16
|
+
class NexusFile
|
17
|
+
|
18
|
+
attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@taxa = []
|
22
|
+
@characters = []
|
23
|
+
@sets = []
|
24
|
+
@codings = []
|
25
|
+
@notes = []
|
26
|
+
@vars = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
class Character
|
30
|
+
attr_accessor :name, :states, :notes
|
31
|
+
def initialize
|
32
|
+
@name = nil
|
33
|
+
@states = {}
|
34
|
+
@notes = []
|
35
|
+
end
|
36
|
+
|
37
|
+
# requires :label
|
38
|
+
def add_state(options = {})
|
39
|
+
@opt = {
|
40
|
+
:name => ''
|
41
|
+
}.merge!(options)
|
42
|
+
return false if !@opt[:label]
|
43
|
+
|
44
|
+
@states.update(@opt[:label] => ChrState.new(@opt[:name]))
|
45
|
+
end
|
46
|
+
|
47
|
+
# test this
|
48
|
+
def state_labels
|
49
|
+
@states.keys.sort
|
50
|
+
end
|
51
|
+
|
52
|
+
def name
|
53
|
+
((@name == "") || (@name == nil)) ? "Undefined" : @name
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class Taxon
|
58
|
+
attr_accessor :name, :mesq_id, :notes
|
59
|
+
def initialize
|
60
|
+
@name = ""
|
61
|
+
@mesq_id = ""
|
62
|
+
@notes = []
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class ChrState
|
67
|
+
# state is stored as a key in Characters.states
|
68
|
+
attr_accessor :name, :notes
|
69
|
+
def initialize(name)
|
70
|
+
@name = name
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class Coding
|
75
|
+
# unfortunately we need this for notes
|
76
|
+
attr_accessor :states, :notes
|
77
|
+
def initialize(options = {})
|
78
|
+
@states = options[:states]
|
79
|
+
@notes = []
|
80
|
+
end
|
81
|
+
|
82
|
+
def states
|
83
|
+
@states.class == Array ? @states : [@states]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class Note
|
88
|
+
attr_accessor :vars
|
89
|
+
def initialize(options = {})
|
90
|
+
@vars = options
|
91
|
+
end
|
92
|
+
|
93
|
+
def note
|
94
|
+
n = ''
|
95
|
+
if @vars[:tf]
|
96
|
+
n = @vars[:tf]
|
97
|
+
elsif @vars[:text]
|
98
|
+
n = @vars[:text]
|
99
|
+
else
|
100
|
+
n = 'No text recovered, possible parsing error.'
|
101
|
+
end
|
102
|
+
|
103
|
+
# THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
|
104
|
+
if n[0..2] =~ /\A\s*\(\s*CM\s*/i
|
105
|
+
n.strip!
|
106
|
+
n = n[1..-2] if n[0..0] == "(" # get rid of quotation marks
|
107
|
+
n.strip!
|
108
|
+
n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
|
109
|
+
n.strip!
|
110
|
+
n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
|
111
|
+
n = n[1..-2] if n[0..0] == '"'
|
112
|
+
end
|
113
|
+
n.strip
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
# constructs the NexusFile
|
121
|
+
class Builder
|
122
|
+
|
123
|
+
def initialize
|
124
|
+
@nf = NexusFile.new
|
125
|
+
end
|
126
|
+
|
127
|
+
def stub_taxon
|
128
|
+
@nf.taxa.push(NexusFile::Taxon.new)
|
129
|
+
return @nf.taxa.size
|
130
|
+
end
|
131
|
+
|
132
|
+
def stub_chr
|
133
|
+
@nf.characters.push(NexusFile::Character.new)
|
134
|
+
return @nf.characters.size
|
135
|
+
end
|
136
|
+
|
137
|
+
def code_row(taxon_index, rowvector)
|
138
|
+
|
139
|
+
@nf.characters.each_with_index do |c, i|
|
140
|
+
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
141
|
+
@nf.codings[taxon_index.to_i][i] = NexusFile::Coding.new(:states => rowvector[i])
|
142
|
+
|
143
|
+
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
144
|
+
|
145
|
+
existing_states = @nf.characters[i].state_labels
|
146
|
+
|
147
|
+
new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
|
148
|
+
new_states.delete("?") # we don't add this to the db
|
149
|
+
new_states = new_states - existing_states
|
150
|
+
|
151
|
+
new_states.each do |s|
|
152
|
+
@nf.characters[i].add_state(:label => s)
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def add_var(hash)
|
159
|
+
hash.keys.each do |k|
|
160
|
+
raise "var #{k} has already been set" if @nf.vars[:k]
|
161
|
+
end
|
162
|
+
@nf.vars.update(hash)
|
163
|
+
end
|
164
|
+
|
165
|
+
def update_taxon(options = {})
|
166
|
+
|
167
|
+
@opt = {
|
168
|
+
:name => ''
|
169
|
+
}.merge!(options)
|
170
|
+
return false if !@opt[:index]
|
171
|
+
|
172
|
+
(@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
|
173
|
+
end
|
174
|
+
|
175
|
+
# legal hash keys are :index, :name, and integers that point to state labels
|
176
|
+
def update_chr(options = {} )
|
177
|
+
@opt = {
|
178
|
+
:name => ''
|
179
|
+
}.merge!(options)
|
180
|
+
return false if !@opt[:index]
|
181
|
+
|
182
|
+
@index = @opt[:index].to_i
|
183
|
+
|
184
|
+
# need to create the characters
|
185
|
+
|
186
|
+
raise(NexusFile::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
187
|
+
|
188
|
+
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
189
|
+
|
190
|
+
@opt.delete(:index)
|
191
|
+
@opt.delete(:name)
|
192
|
+
|
193
|
+
# the rest have states
|
194
|
+
@opt.keys.each do |k|
|
195
|
+
|
196
|
+
if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
|
197
|
+
|
198
|
+
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
199
|
+
update_state(@index, :index => k, :name => @opt[k])
|
200
|
+
|
201
|
+
else # doesn't, create it
|
202
|
+
@nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
def update_state(chr_index, options = {})
|
209
|
+
# only handling name now
|
210
|
+
#options.keys.each do |k|
|
211
|
+
@nf.characters[chr_index].states[options[:index]].name = options[:name]
|
212
|
+
# add notes here
|
213
|
+
# end
|
214
|
+
end
|
215
|
+
|
216
|
+
def add_note(options = {})
|
217
|
+
@opt = {
|
218
|
+
:text => ''
|
219
|
+
}.merge!(options)
|
220
|
+
|
221
|
+
case @opt[:type]
|
222
|
+
|
223
|
+
# Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
|
224
|
+
when 'TEXT' # a footnote
|
225
|
+
if @opt[:file]
|
226
|
+
@nf.notes << NexusFile::Note.new(@opt)
|
227
|
+
|
228
|
+
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
229
|
+
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
|
230
|
+
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
|
231
|
+
|
232
|
+
elsif @opt[:taxon] && !@opt[:character]
|
233
|
+
@nf.taxa[@opt[:taxon].to_i - 1].notes << NexusFile::Note.new(@opt)
|
234
|
+
|
235
|
+
elsif @opt[:character] && !@opt[:taxon]
|
236
|
+
|
237
|
+
@nf.characters[@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
|
238
|
+
end
|
239
|
+
|
240
|
+
when 'AN' # an annotation, rather than a footnote, same dif
|
241
|
+
if @opt[:t] && @opt[:c]
|
242
|
+
@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes = [] if !@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes
|
243
|
+
@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
|
244
|
+
elsif @opt[:t]
|
245
|
+
@nf.taxa[@opt[:t].to_i - 1].notes << NexusFile::Note.new(@opt)
|
246
|
+
elsif @opt[:c]
|
247
|
+
@nf.characters[@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
end
|
252
|
+
|
253
|
+
def nexus_file
|
254
|
+
@nf
|
255
|
+
end
|
256
|
+
|
257
|
+
end # end file
|
258
|
+
|
259
|
+
# NexusFile::ParseError
|
260
|
+
class ParseError < StandardError
|
261
|
+
end
|
262
|
+
|
263
|
+
|
264
|
+
end # end module
|
265
|
+
|
266
|
+
|
267
|
+
def parse_nexus_file(input)
|
268
|
+
@input = input
|
269
|
+
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
270
|
+
|
271
|
+
# quickly peek at the input, does this look like a Nexus file?
|
272
|
+
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
273
|
+
raise(NexusFile::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
274
|
+
end
|
275
|
+
|
276
|
+
builder = NexusFile::Builder.new
|
277
|
+
lexer = NexusFile::Lexer.new(@input)
|
278
|
+
NexusFile::Parser.new(lexer, builder).parse_file
|
279
|
+
|
280
|
+
return builder.nexus_file
|
281
|
+
end
|
282
|
+
|