nexus_parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/MIT-LICENSE +20 -0
- data/README +13 -0
- data/README.rdoc +17 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/init.rb +1 -0
- data/install.rb +1 -0
- data/lib/lexer.rb +66 -0
- data/lib/nexus_file.rb +282 -0
- data/lib/parser.rb +334 -0
- data/lib/tokens.rb +269 -0
- data/tasks/nexus_parser_tasks.rake +4 -0
- data/test/MX_test_03.nex +234 -0
- data/test/test.nex +382 -0
- data/test/test_nexus_parser.rb +937 -0
- data/uninstall.rb +1 -0
- metadata +82 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 mjy
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 [name of plugin creator]
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= nexus_parser
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Note on Patches/Pull Requests
|
6
|
+
|
7
|
+
* Fork the project.
|
8
|
+
* Make your feature addition or bug fix.
|
9
|
+
* Add tests for it. This is important so I don't break it in a
|
10
|
+
future version unintentionally.
|
11
|
+
* Commit, do not mess with rakefile, version, or history.
|
12
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
+
* Send me a pull request. Bonus points for topic branches.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2010 mjy. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "nexus_parser"
|
8
|
+
gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
|
9
|
+
gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
|
10
|
+
gem.email = "diapriid@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/mjy/nexus_parser"
|
12
|
+
gem.authors = ["mjy"]
|
13
|
+
# gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "nexus_parser #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# Include hook code here
|
data/install.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# Install hook code here
|
data/lib/lexer.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
class NexusFile::Lexer
|
4
|
+
|
5
|
+
def initialize(input)
|
6
|
+
@input = input
|
7
|
+
# linefeed check the input here -
|
8
|
+
@input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
|
9
|
+
@next_token = nil
|
10
|
+
end
|
11
|
+
|
12
|
+
# checks whether the next token is of the specified class.
|
13
|
+
def peek(token_class)
|
14
|
+
token = read_next_token(token_class)
|
15
|
+
return token.class == token_class
|
16
|
+
end
|
17
|
+
|
18
|
+
# return (and delete) the next token from the input stream, or raise an exception
|
19
|
+
# if the next token is not of the given class.
|
20
|
+
def pop(token_class)
|
21
|
+
token = read_next_token(token_class)
|
22
|
+
@next_token = nil
|
23
|
+
if token.class != token_class
|
24
|
+
raise(NexusFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
|
25
|
+
else
|
26
|
+
return token
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
# read (and store) the next token from the input, if it has not already been read.
|
32
|
+
def read_next_token(token_class)
|
33
|
+
if @next_token
|
34
|
+
return @next_token
|
35
|
+
else
|
36
|
+
# check for a match on the specified class first
|
37
|
+
if match(token_class)
|
38
|
+
return @next_token
|
39
|
+
else
|
40
|
+
# now check all the tokens for a match
|
41
|
+
NexusFile::Tokens.nexus_file_token_list.each {|t|
|
42
|
+
return @next_token if match(t)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
# no match, either end of string or lex-error
|
46
|
+
if @input != ''
|
47
|
+
raise( NexusFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
48
|
+
else
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def match(token_class)
|
55
|
+
if (m = token_class.regexp.match(@input))
|
56
|
+
@next_token = token_class.new(m[1])
|
57
|
+
@input = @input[m.end(0)..-1]
|
58
|
+
return true
|
59
|
+
else
|
60
|
+
return false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
|
data/lib/nexus_file.rb
ADDED
@@ -0,0 +1,282 @@
|
|
1
|
+
# NexusParser
|
2
|
+
|
3
|
+
# version 0.3 by Matt Yoder
|
4
|
+
# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
|
5
|
+
# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
|
6
|
+
|
7
|
+
# outstanding issues:
|
8
|
+
## need to resolve Tokens Labels, ValuePair, IDs
|
9
|
+
|
10
|
+
module NexusFile
|
11
|
+
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
14
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
15
|
+
|
16
|
+
class NexusFile
|
17
|
+
|
18
|
+
attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@taxa = []
|
22
|
+
@characters = []
|
23
|
+
@sets = []
|
24
|
+
@codings = []
|
25
|
+
@notes = []
|
26
|
+
@vars = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
class Character
|
30
|
+
attr_accessor :name, :states, :notes
|
31
|
+
def initialize
|
32
|
+
@name = nil
|
33
|
+
@states = {}
|
34
|
+
@notes = []
|
35
|
+
end
|
36
|
+
|
37
|
+
# requires :label
|
38
|
+
def add_state(options = {})
|
39
|
+
@opt = {
|
40
|
+
:name => ''
|
41
|
+
}.merge!(options)
|
42
|
+
return false if !@opt[:label]
|
43
|
+
|
44
|
+
@states.update(@opt[:label] => ChrState.new(@opt[:name]))
|
45
|
+
end
|
46
|
+
|
47
|
+
# test this
|
48
|
+
def state_labels
|
49
|
+
@states.keys.sort
|
50
|
+
end
|
51
|
+
|
52
|
+
def name
|
53
|
+
((@name == "") || (@name == nil)) ? "Undefined" : @name
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class Taxon
|
58
|
+
attr_accessor :name, :mesq_id, :notes
|
59
|
+
def initialize
|
60
|
+
@name = ""
|
61
|
+
@mesq_id = ""
|
62
|
+
@notes = []
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class ChrState
|
67
|
+
# state is stored as a key in Characters.states
|
68
|
+
attr_accessor :name, :notes
|
69
|
+
def initialize(name)
|
70
|
+
@name = name
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class Coding
|
75
|
+
# unfortunately we need this for notes
|
76
|
+
attr_accessor :states, :notes
|
77
|
+
def initialize(options = {})
|
78
|
+
@states = options[:states]
|
79
|
+
@notes = []
|
80
|
+
end
|
81
|
+
|
82
|
+
def states
|
83
|
+
@states.class == Array ? @states : [@states]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class Note
|
88
|
+
attr_accessor :vars
|
89
|
+
def initialize(options = {})
|
90
|
+
@vars = options
|
91
|
+
end
|
92
|
+
|
93
|
+
def note
|
94
|
+
n = ''
|
95
|
+
if @vars[:tf]
|
96
|
+
n = @vars[:tf]
|
97
|
+
elsif @vars[:text]
|
98
|
+
n = @vars[:text]
|
99
|
+
else
|
100
|
+
n = 'No text recovered, possible parsing error.'
|
101
|
+
end
|
102
|
+
|
103
|
+
# THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
|
104
|
+
if n[0..2] =~ /\A\s*\(\s*CM\s*/i
|
105
|
+
n.strip!
|
106
|
+
n = n[1..-2] if n[0..0] == "(" # get rid of quotation marks
|
107
|
+
n.strip!
|
108
|
+
n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
|
109
|
+
n.strip!
|
110
|
+
n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
|
111
|
+
n = n[1..-2] if n[0..0] == '"'
|
112
|
+
end
|
113
|
+
n.strip
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
# constructs the NexusFile
|
121
|
+
class Builder
|
122
|
+
|
123
|
+
def initialize
|
124
|
+
@nf = NexusFile.new
|
125
|
+
end
|
126
|
+
|
127
|
+
def stub_taxon
|
128
|
+
@nf.taxa.push(NexusFile::Taxon.new)
|
129
|
+
return @nf.taxa.size
|
130
|
+
end
|
131
|
+
|
132
|
+
def stub_chr
|
133
|
+
@nf.characters.push(NexusFile::Character.new)
|
134
|
+
return @nf.characters.size
|
135
|
+
end
|
136
|
+
|
137
|
+
def code_row(taxon_index, rowvector)
|
138
|
+
|
139
|
+
@nf.characters.each_with_index do |c, i|
|
140
|
+
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
141
|
+
@nf.codings[taxon_index.to_i][i] = NexusFile::Coding.new(:states => rowvector[i])
|
142
|
+
|
143
|
+
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
144
|
+
|
145
|
+
existing_states = @nf.characters[i].state_labels
|
146
|
+
|
147
|
+
new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
|
148
|
+
new_states.delete("?") # we don't add this to the db
|
149
|
+
new_states = new_states - existing_states
|
150
|
+
|
151
|
+
new_states.each do |s|
|
152
|
+
@nf.characters[i].add_state(:label => s)
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def add_var(hash)
|
159
|
+
hash.keys.each do |k|
|
160
|
+
raise "var #{k} has already been set" if @nf.vars[:k]
|
161
|
+
end
|
162
|
+
@nf.vars.update(hash)
|
163
|
+
end
|
164
|
+
|
165
|
+
def update_taxon(options = {})
|
166
|
+
|
167
|
+
@opt = {
|
168
|
+
:name => ''
|
169
|
+
}.merge!(options)
|
170
|
+
return false if !@opt[:index]
|
171
|
+
|
172
|
+
(@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
|
173
|
+
end
|
174
|
+
|
175
|
+
# legal hash keys are :index, :name, and integers that point to state labels
|
176
|
+
def update_chr(options = {} )
|
177
|
+
@opt = {
|
178
|
+
:name => ''
|
179
|
+
}.merge!(options)
|
180
|
+
return false if !@opt[:index]
|
181
|
+
|
182
|
+
@index = @opt[:index].to_i
|
183
|
+
|
184
|
+
# need to create the characters
|
185
|
+
|
186
|
+
raise(NexusFile::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
187
|
+
|
188
|
+
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
189
|
+
|
190
|
+
@opt.delete(:index)
|
191
|
+
@opt.delete(:name)
|
192
|
+
|
193
|
+
# the rest have states
|
194
|
+
@opt.keys.each do |k|
|
195
|
+
|
196
|
+
if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
|
197
|
+
|
198
|
+
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
199
|
+
update_state(@index, :index => k, :name => @opt[k])
|
200
|
+
|
201
|
+
else # doesn't, create it
|
202
|
+
@nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
def update_state(chr_index, options = {})
|
209
|
+
# only handling name now
|
210
|
+
#options.keys.each do |k|
|
211
|
+
@nf.characters[chr_index].states[options[:index]].name = options[:name]
|
212
|
+
# add notes here
|
213
|
+
# end
|
214
|
+
end
|
215
|
+
|
216
|
+
def add_note(options = {})
|
217
|
+
@opt = {
|
218
|
+
:text => ''
|
219
|
+
}.merge!(options)
|
220
|
+
|
221
|
+
case @opt[:type]
|
222
|
+
|
223
|
+
# Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
|
224
|
+
when 'TEXT' # a footnote
|
225
|
+
if @opt[:file]
|
226
|
+
@nf.notes << NexusFile::Note.new(@opt)
|
227
|
+
|
228
|
+
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
229
|
+
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
|
230
|
+
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
|
231
|
+
|
232
|
+
elsif @opt[:taxon] && !@opt[:character]
|
233
|
+
@nf.taxa[@opt[:taxon].to_i - 1].notes << NexusFile::Note.new(@opt)
|
234
|
+
|
235
|
+
elsif @opt[:character] && !@opt[:taxon]
|
236
|
+
|
237
|
+
@nf.characters[@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
|
238
|
+
end
|
239
|
+
|
240
|
+
when 'AN' # an annotation, rather than a footnote, same dif
|
241
|
+
if @opt[:t] && @opt[:c]
|
242
|
+
@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes = [] if !@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes
|
243
|
+
@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
|
244
|
+
elsif @opt[:t]
|
245
|
+
@nf.taxa[@opt[:t].to_i - 1].notes << NexusFile::Note.new(@opt)
|
246
|
+
elsif @opt[:c]
|
247
|
+
@nf.characters[@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
end
|
252
|
+
|
253
|
+
def nexus_file
|
254
|
+
@nf
|
255
|
+
end
|
256
|
+
|
257
|
+
end # end file
|
258
|
+
|
259
|
+
# NexusFile::ParseError
|
260
|
+
class ParseError < StandardError
|
261
|
+
end
|
262
|
+
|
263
|
+
|
264
|
+
end # end module
|
265
|
+
|
266
|
+
|
267
|
+
def parse_nexus_file(input)
|
268
|
+
@input = input
|
269
|
+
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
270
|
+
|
271
|
+
# quickly peek at the input, does this look like a Nexus file?
|
272
|
+
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
273
|
+
raise(NexusFile::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
274
|
+
end
|
275
|
+
|
276
|
+
builder = NexusFile::Builder.new
|
277
|
+
lexer = NexusFile::Lexer.new(@input)
|
278
|
+
NexusFile::Parser.new(lexer, builder).parse_file
|
279
|
+
|
280
|
+
return builder.nexus_file
|
281
|
+
end
|
282
|
+
|