nexus_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 mjy
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 [name of plugin creator]
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,13 @@
1
+ NexusParser
2
+ ===========
3
+
4
+ Introduction goes here.
5
+
6
+
7
+ Example
8
+ =======
9
+
10
+ Example goes here.
11
+
12
+
13
+ Copyright (c) 2008 Matt Yoder, released under the MIT license
@@ -0,0 +1,17 @@
1
+ = nexus_parser
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 mjy. See LICENSE for details.
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "nexus_parser"
8
+ gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
9
+ gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
10
+ gem.email = "diapriid@gmail.com"
11
+ gem.homepage = "http://github.com/mjy/nexus_parser"
12
+ gem.authors = ["mjy"]
13
+ # gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "nexus_parser #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ # Include hook code here
@@ -0,0 +1 @@
1
+ # Install hook code here
@@ -0,0 +1,66 @@
1
+
2
+
3
+ class NexusFile::Lexer
4
+
5
+ def initialize(input)
6
+ @input = input
7
+ # linefeed check the input here -
8
+ @input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
9
+ @next_token = nil
10
+ end
11
+
12
+ # checks whether the next token is of the specified class.
13
+ def peek(token_class)
14
+ token = read_next_token(token_class)
15
+ return token.class == token_class
16
+ end
17
+
18
+ # return (and delete) the next token from the input stream, or raise an exception
19
+ # if the next token is not of the given class.
20
+ def pop(token_class)
21
+ token = read_next_token(token_class)
22
+ @next_token = nil
23
+ if token.class != token_class
24
+ raise(NexusFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
25
+ else
26
+ return token
27
+ end
28
+ end
29
+
30
+ private
31
+ # read (and store) the next token from the input, if it has not already been read.
32
+ def read_next_token(token_class)
33
+ if @next_token
34
+ return @next_token
35
+ else
36
+ # check for a match on the specified class first
37
+ if match(token_class)
38
+ return @next_token
39
+ else
40
+ # now check all the tokens for a match
41
+ NexusFile::Tokens.nexus_file_token_list.each {|t|
42
+ return @next_token if match(t)
43
+ }
44
+ end
45
+ # no match, either end of string or lex-error
46
+ if @input != ''
47
+ raise( NexusFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
48
+ else
49
+ return nil
50
+ end
51
+ end
52
+ end
53
+
54
+ def match(token_class)
55
+ if (m = token_class.regexp.match(@input))
56
+ @next_token = token_class.new(m[1])
57
+ @input = @input[m.end(0)..-1]
58
+ return true
59
+ else
60
+ return false
61
+ end
62
+ end
63
+ end
64
+
65
+
66
+
@@ -0,0 +1,282 @@
1
+ # NexusParser
2
+
3
+ # version 0.3 by Matt Yoder
4
+ # uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
5
+ # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
6
+
7
+ # outstanding issues:
8
+ ## need to resolve Tokens Labels, ValuePair, IDs
9
+
10
+ module NexusFile
11
+
12
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
13
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
14
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
15
+
16
+ class NexusFile
17
+
18
+ attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
19
+
20
+ def initialize
21
+ @taxa = []
22
+ @characters = []
23
+ @sets = []
24
+ @codings = []
25
+ @notes = []
26
+ @vars = {}
27
+ end
28
+
29
+ class Character
30
+ attr_accessor :name, :states, :notes
31
+ def initialize
32
+ @name = nil
33
+ @states = {}
34
+ @notes = []
35
+ end
36
+
37
+ # requires :label
38
+ def add_state(options = {})
39
+ @opt = {
40
+ :name => ''
41
+ }.merge!(options)
42
+ return false if !@opt[:label]
43
+
44
+ @states.update(@opt[:label] => ChrState.new(@opt[:name]))
45
+ end
46
+
47
+ # test this
48
+ def state_labels
49
+ @states.keys.sort
50
+ end
51
+
52
+ def name
53
+ ((@name == "") || (@name == nil)) ? "Undefined" : @name
54
+ end
55
+ end
56
+
57
+ class Taxon
58
+ attr_accessor :name, :mesq_id, :notes
59
+ def initialize
60
+ @name = ""
61
+ @mesq_id = ""
62
+ @notes = []
63
+ end
64
+ end
65
+
66
+ class ChrState
67
+ # state is stored as a key in Characters.states
68
+ attr_accessor :name, :notes
69
+ def initialize(name)
70
+ @name = name
71
+ end
72
+ end
73
+
74
+ class Coding
75
+ # unfortunately we need this for notes
76
+ attr_accessor :states, :notes
77
+ def initialize(options = {})
78
+ @states = options[:states]
79
+ @notes = []
80
+ end
81
+
82
+ def states
83
+ @states.class == Array ? @states : [@states]
84
+ end
85
+ end
86
+
87
+ class Note
88
+ attr_accessor :vars
89
+ def initialize(options = {})
90
+ @vars = options
91
+ end
92
+
93
+ def note
94
+ n = ''
95
+ if @vars[:tf]
96
+ n = @vars[:tf]
97
+ elsif @vars[:text]
98
+ n = @vars[:text]
99
+ else
100
+ n = 'No text recovered, possible parsing error.'
101
+ end
102
+
103
+ # THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
104
+ if n[0..2] =~ /\A\s*\(\s*CM\s*/i
105
+ n.strip!
106
+ n = n[1..-2] if n[0..0] == "(" # get rid of quotation marks
107
+ n.strip!
108
+ n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
109
+ n.strip!
110
+ n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
111
+ n = n[1..-2] if n[0..0] == '"'
112
+ end
113
+ n.strip
114
+ end
115
+ end
116
+
117
+ end
118
+
119
+
120
+ # constructs the NexusFile
121
+ class Builder
122
+
123
+ def initialize
124
+ @nf = NexusFile.new
125
+ end
126
+
127
+ def stub_taxon
128
+ @nf.taxa.push(NexusFile::Taxon.new)
129
+ return @nf.taxa.size
130
+ end
131
+
132
+ def stub_chr
133
+ @nf.characters.push(NexusFile::Character.new)
134
+ return @nf.characters.size
135
+ end
136
+
137
+ def code_row(taxon_index, rowvector)
138
+
139
+ @nf.characters.each_with_index do |c, i|
140
+ @nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
141
+ @nf.codings[taxon_index.to_i][i] = NexusFile::Coding.new(:states => rowvector[i])
142
+
143
+ # !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
144
+
145
+ existing_states = @nf.characters[i].state_labels
146
+
147
+ new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
148
+ new_states.delete("?") # we don't add this to the db
149
+ new_states = new_states - existing_states
150
+
151
+ new_states.each do |s|
152
+ @nf.characters[i].add_state(:label => s)
153
+ end
154
+
155
+ end
156
+ end
157
+
158
+ def add_var(hash)
159
+ hash.keys.each do |k|
160
+ raise "var #{k} has already been set" if @nf.vars[:k]
161
+ end
162
+ @nf.vars.update(hash)
163
+ end
164
+
165
+ def update_taxon(options = {})
166
+
167
+ @opt = {
168
+ :name => ''
169
+ }.merge!(options)
170
+ return false if !@opt[:index]
171
+
172
+ (@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
173
+ end
174
+
175
+ # legal hash keys are :index, :name, and integers that point to state labels
176
+ def update_chr(options = {} )
177
+ @opt = {
178
+ :name => ''
179
+ }.merge!(options)
180
+ return false if !@opt[:index]
181
+
182
+ @index = @opt[:index].to_i
183
+
184
+ # need to create the characters
185
+
186
+ raise(NexusFile::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
187
+
188
+ (@nf.characters[@index].name = @opt[:name]) if @opt[:name]
189
+
190
+ @opt.delete(:index)
191
+ @opt.delete(:name)
192
+
193
+ # the rest have states
194
+ @opt.keys.each do |k|
195
+
196
+ if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
197
+
198
+ ## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
199
+ update_state(@index, :index => k, :name => @opt[k])
200
+
201
+ else # doesn't, create it
202
+ @nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
203
+ end
204
+ end
205
+
206
+ end
207
+
208
+ def update_state(chr_index, options = {})
209
+ # only handling name now
210
+ #options.keys.each do |k|
211
+ @nf.characters[chr_index].states[options[:index]].name = options[:name]
212
+ # add notes here
213
+ # end
214
+ end
215
+
216
+ def add_note(options = {})
217
+ @opt = {
218
+ :text => ''
219
+ }.merge!(options)
220
+
221
+ case @opt[:type]
222
+
223
+ # Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
224
+ when 'TEXT' # a footnote
225
+ if @opt[:file]
226
+ @nf.notes << NexusFile::Note.new(@opt)
227
+
228
+ elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
229
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
230
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
231
+
232
+ elsif @opt[:taxon] && !@opt[:character]
233
+ @nf.taxa[@opt[:taxon].to_i - 1].notes << NexusFile::Note.new(@opt)
234
+
235
+ elsif @opt[:character] && !@opt[:taxon]
236
+
237
+ @nf.characters[@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
238
+ end
239
+
240
+ when 'AN' # an annotation, rather than a footnote, same dif
241
+ if @opt[:t] && @opt[:c]
242
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes = [] if !@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes
243
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
244
+ elsif @opt[:t]
245
+ @nf.taxa[@opt[:t].to_i - 1].notes << NexusFile::Note.new(@opt)
246
+ elsif @opt[:c]
247
+ @nf.characters[@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
248
+ end
249
+ end
250
+
251
+ end
252
+
253
+ def nexus_file
254
+ @nf
255
+ end
256
+
257
+ end # end file
258
+
259
+ # NexusFile::ParseError
260
+ class ParseError < StandardError
261
+ end
262
+
263
+
264
+ end # end module
265
+
266
+
267
+ def parse_nexus_file(input)
268
+ @input = input
269
+ @input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
270
+
271
+ # quickly peek at the input, does this look like a Nexus file?
272
+ if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
273
+ raise(NexusFile::ParseError, "File is missing at least some required headers, check formatting.", caller)
274
+ end
275
+
276
+ builder = NexusFile::Builder.new
277
+ lexer = NexusFile::Lexer.new(@input)
278
+ NexusFile::Parser.new(lexer, builder).parse_file
279
+
280
+ return builder.nexus_file
281
+ end
282
+