nexus_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 mjy
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 [name of plugin creator]
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,13 @@
1
+ NexusParser
2
+ ===========
3
+
4
+ Introduction goes here.
5
+
6
+
7
+ Example
8
+ =======
9
+
10
+ Example goes here.
11
+
12
+
13
+ Copyright (c) 2008 Matt Yoder, released under the MIT license
@@ -0,0 +1,17 @@
1
+ = nexus_parser
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2010 mjy. See LICENSE for details.
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "nexus_parser"
8
+ gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
9
+ gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
10
+ gem.email = "diapriid@gmail.com"
11
+ gem.homepage = "http://github.com/mjy/nexus_parser"
12
+ gem.authors = ["mjy"]
13
+ # gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "nexus_parser #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ # Include hook code here
@@ -0,0 +1 @@
1
+ # Install hook code here
@@ -0,0 +1,66 @@
1
+
2
+
3
+ class NexusFile::Lexer
4
+
5
+ def initialize(input)
6
+ @input = input
7
+ # linefeed check the input here -
8
+ @input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
9
+ @next_token = nil
10
+ end
11
+
12
+ # checks whether the next token is of the specified class.
13
+ def peek(token_class)
14
+ token = read_next_token(token_class)
15
+ return token.class == token_class
16
+ end
17
+
18
+ # return (and delete) the next token from the input stream, or raise an exception
19
+ # if the next token is not of the given class.
20
+ def pop(token_class)
21
+ token = read_next_token(token_class)
22
+ @next_token = nil
23
+ if token.class != token_class
24
+ raise(NexusFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
25
+ else
26
+ return token
27
+ end
28
+ end
29
+
30
+ private
31
+ # read (and store) the next token from the input, if it has not already been read.
32
+ def read_next_token(token_class)
33
+ if @next_token
34
+ return @next_token
35
+ else
36
+ # check for a match on the specified class first
37
+ if match(token_class)
38
+ return @next_token
39
+ else
40
+ # now check all the tokens for a match
41
+ NexusFile::Tokens.nexus_file_token_list.each {|t|
42
+ return @next_token if match(t)
43
+ }
44
+ end
45
+ # no match, either end of string or lex-error
46
+ if @input != ''
47
+ raise( NexusFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
48
+ else
49
+ return nil
50
+ end
51
+ end
52
+ end
53
+
54
+ def match(token_class)
55
+ if (m = token_class.regexp.match(@input))
56
+ @next_token = token_class.new(m[1])
57
+ @input = @input[m.end(0)..-1]
58
+ return true
59
+ else
60
+ return false
61
+ end
62
+ end
63
+ end
64
+
65
+
66
+
@@ -0,0 +1,282 @@
1
+ # NexusParser
2
+
3
+ # version 0.3 by Matt Yoder
4
+ # uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
5
+ # Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
6
+
7
+ # outstanding issues:
8
+ ## need to resolve Tokens Labels, ValuePair, IDs
9
+
10
+ module NexusFile
11
+
12
+ require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
13
+ require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
14
+ require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
15
+
16
+ class NexusFile
17
+
18
+ attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
19
+
20
+ def initialize
21
+ @taxa = []
22
+ @characters = []
23
+ @sets = []
24
+ @codings = []
25
+ @notes = []
26
+ @vars = {}
27
+ end
28
+
29
+ class Character
30
+ attr_accessor :name, :states, :notes
31
+ def initialize
32
+ @name = nil
33
+ @states = {}
34
+ @notes = []
35
+ end
36
+
37
+ # requires :label
38
+ def add_state(options = {})
39
+ @opt = {
40
+ :name => ''
41
+ }.merge!(options)
42
+ return false if !@opt[:label]
43
+
44
+ @states.update(@opt[:label] => ChrState.new(@opt[:name]))
45
+ end
46
+
47
+ # test this
48
+ def state_labels
49
+ @states.keys.sort
50
+ end
51
+
52
+ def name
53
+ ((@name == "") || (@name == nil)) ? "Undefined" : @name
54
+ end
55
+ end
56
+
57
+ class Taxon
58
+ attr_accessor :name, :mesq_id, :notes
59
+ def initialize
60
+ @name = ""
61
+ @mesq_id = ""
62
+ @notes = []
63
+ end
64
+ end
65
+
66
+ class ChrState
67
+ # state is stored as a key in Characters.states
68
+ attr_accessor :name, :notes
69
+ def initialize(name)
70
+ @name = name
71
+ end
72
+ end
73
+
74
+ class Coding
75
+ # unfortunately we need this for notes
76
+ attr_accessor :states, :notes
77
+ def initialize(options = {})
78
+ @states = options[:states]
79
+ @notes = []
80
+ end
81
+
82
+ def states
83
+ @states.class == Array ? @states : [@states]
84
+ end
85
+ end
86
+
87
+ class Note
88
+ attr_accessor :vars
89
+ def initialize(options = {})
90
+ @vars = options
91
+ end
92
+
93
+ def note
94
+ n = ''
95
+ if @vars[:tf]
96
+ n = @vars[:tf]
97
+ elsif @vars[:text]
98
+ n = @vars[:text]
99
+ else
100
+ n = 'No text recovered, possible parsing error.'
101
+ end
102
+
103
+ # THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
104
+ if n[0..2] =~ /\A\s*\(\s*CM\s*/i
105
+ n.strip!
106
+ n = n[1..-2] if n[0..0] == "(" # get rid of quotation marks
107
+ n.strip!
108
+ n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
109
+ n.strip!
110
+ n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
111
+ n = n[1..-2] if n[0..0] == '"'
112
+ end
113
+ n.strip
114
+ end
115
+ end
116
+
117
+ end
118
+
119
+
120
+ # constructs the NexusFile
121
+ class Builder
122
+
123
+ def initialize
124
+ @nf = NexusFile.new
125
+ end
126
+
127
+ def stub_taxon
128
+ @nf.taxa.push(NexusFile::Taxon.new)
129
+ return @nf.taxa.size
130
+ end
131
+
132
+ def stub_chr
133
+ @nf.characters.push(NexusFile::Character.new)
134
+ return @nf.characters.size
135
+ end
136
+
137
+ def code_row(taxon_index, rowvector)
138
+
139
+ @nf.characters.each_with_index do |c, i|
140
+ @nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
141
+ @nf.codings[taxon_index.to_i][i] = NexusFile::Coding.new(:states => rowvector[i])
142
+
143
+ # !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
144
+
145
+ existing_states = @nf.characters[i].state_labels
146
+
147
+ new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
148
+ new_states.delete("?") # we don't add this to the db
149
+ new_states = new_states - existing_states
150
+
151
+ new_states.each do |s|
152
+ @nf.characters[i].add_state(:label => s)
153
+ end
154
+
155
+ end
156
+ end
157
+
158
+ def add_var(hash)
159
+ hash.keys.each do |k|
160
+ raise "var #{k} has already been set" if @nf.vars[:k]
161
+ end
162
+ @nf.vars.update(hash)
163
+ end
164
+
165
+ def update_taxon(options = {})
166
+
167
+ @opt = {
168
+ :name => ''
169
+ }.merge!(options)
170
+ return false if !@opt[:index]
171
+
172
+ (@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
173
+ end
174
+
175
+ # legal hash keys are :index, :name, and integers that point to state labels
176
+ def update_chr(options = {} )
177
+ @opt = {
178
+ :name => ''
179
+ }.merge!(options)
180
+ return false if !@opt[:index]
181
+
182
+ @index = @opt[:index].to_i
183
+
184
+ # need to create the characters
185
+
186
+ raise(NexusFile::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
187
+
188
+ (@nf.characters[@index].name = @opt[:name]) if @opt[:name]
189
+
190
+ @opt.delete(:index)
191
+ @opt.delete(:name)
192
+
193
+ # the rest have states
194
+ @opt.keys.each do |k|
195
+
196
+ if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
197
+
198
+ ## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
199
+ update_state(@index, :index => k, :name => @opt[k])
200
+
201
+ else # doesn't, create it
202
+ @nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
203
+ end
204
+ end
205
+
206
+ end
207
+
208
+ def update_state(chr_index, options = {})
209
+ # only handling name now
210
+ #options.keys.each do |k|
211
+ @nf.characters[chr_index].states[options[:index]].name = options[:name]
212
+ # add notes here
213
+ # end
214
+ end
215
+
216
+ def add_note(options = {})
217
+ @opt = {
218
+ :text => ''
219
+ }.merge!(options)
220
+
221
+ case @opt[:type]
222
+
223
+ # Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
224
+ when 'TEXT' # a footnote
225
+ if @opt[:file]
226
+ @nf.notes << NexusFile::Note.new(@opt)
227
+
228
+ elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
229
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
230
+ @nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
231
+
232
+ elsif @opt[:taxon] && !@opt[:character]
233
+ @nf.taxa[@opt[:taxon].to_i - 1].notes << NexusFile::Note.new(@opt)
234
+
235
+ elsif @opt[:character] && !@opt[:taxon]
236
+
237
+ @nf.characters[@opt[:character].to_i - 1].notes << NexusFile::Note.new(@opt)
238
+ end
239
+
240
+ when 'AN' # an annotation, rather than a footnote, same dif
241
+ if @opt[:t] && @opt[:c]
242
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes = [] if !@nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes
243
+ @nf.codings[@opt[:t].to_i - 1][@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
244
+ elsif @opt[:t]
245
+ @nf.taxa[@opt[:t].to_i - 1].notes << NexusFile::Note.new(@opt)
246
+ elsif @opt[:c]
247
+ @nf.characters[@opt[:c].to_i - 1].notes << NexusFile::Note.new(@opt)
248
+ end
249
+ end
250
+
251
+ end
252
+
253
+ def nexus_file
254
+ @nf
255
+ end
256
+
257
+ end # end file
258
+
259
+ # NexusFile::ParseError
260
+ class ParseError < StandardError
261
+ end
262
+
263
+
264
+ end # end module
265
+
266
+
267
+ def parse_nexus_file(input)
268
+ @input = input
269
+ @input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
270
+
271
+ # quickly peek at the input, does this look like a Nexus file?
272
+ if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
273
+ raise(NexusFile::ParseError, "File is missing at least some required headers, check formatting.", caller)
274
+ end
275
+
276
+ builder = NexusFile::Builder.new
277
+ lexer = NexusFile::Lexer.new(@input)
278
+ NexusFile::Parser.new(lexer, builder).parse_file
279
+
280
+ return builder.nexus_file
281
+ end
282
+