nexus_parser 1.1.4 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/LICENSE +25 -17
- data/{README.rdoc → README.md} +7 -5
- data/Rakefile +5 -21
- data/lib/{lexer.rb → nexus_parser/lexer.rb} +17 -17
- data/lib/{parser.rb → nexus_parser/parser.rb} +2 -2
- data/lib/{tokens.rb → nexus_parser/tokens.rb} +36 -36
- data/lib/nexus_parser/version.rb +5 -0
- data/lib/nexus_parser.rb +44 -39
- data/nexus_parser.gemspec +49 -50
- data/test/MX_test_03.nex +3 -3
- data/test/test_nexus_parser.rb +134 -138
- metadata +111 -63
- data/MIT-LICENSE +0 -20
- data/README +0 -13
- data/VERSION +0 -1
- data/init.rb +0 -1
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 93e9b7ced7b53e19e2cd33c72c433736058c4adcf7555dc6b43635c08f5dcea7
|
4
|
+
data.tar.gz: 4b9bce06037be960e29e1e4f02bbeef286f47a35e977384a5d451e1f0c3e3f91
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2297128f4ad470e7de1760996aaa712ed8bc691fee0a3cc8a59de54cf1b40bcc9d1cd6478049ae14f93b90ea208a93b6bc4872b76ecf8fc99221ea1b583f954d
|
7
|
+
data.tar.gz: 19615c9fcdc2469bf32e681ac3b81b1a4ca8f468a0154671573b73c0e54f247d78b075ee3a2a1d63a4ba21c0cd657dfe656f8140b8feed07ca54d93fadf1772a
|
data/.gitignore
CHANGED
data/LICENSE
CHANGED
@@ -1,20 +1,28 @@
|
|
1
|
-
Copyright (c)
|
1
|
+
Copyright (c) 2008- Matt Yoder. All rights reserved.
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
-
permit persons to whom the Software is furnished to do so, subject to
|
9
|
-
the following conditions:
|
3
|
+
Developed by: Matt Yoder, Species File Group, and Collaborators
|
4
|
+
University of Illinois
|
5
|
+
https://speciesfilegroup.org
|
10
6
|
|
11
|
-
|
12
|
-
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
8
|
+
this software and associated documentation files (the "Software"), to deal with
|
9
|
+
the Software without restriction, including without limitation the rights
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
11
|
+
of the Software, and to permit persons to whom the Software is furnished to
|
12
|
+
do so, subject to the following conditions:
|
13
|
+
* Redistributions of source code must retain the above copyright notice,
|
14
|
+
this list of conditions and the following disclaimers.
|
15
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
16
|
+
this list of conditions and the following disclaimers in the documentation
|
17
|
+
and/or other materials provided with the distribution.
|
18
|
+
* Neither the names of <NAME OF DEVELOPMENT GROUP>, <NAME OF INSTITUTION>,
|
19
|
+
nor the names of its contributors may be used to endorse or promote products
|
20
|
+
derived from this Software without specific prior written permission.
|
13
21
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
OF
|
20
|
-
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
25
|
+
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
28
|
+
SOFTWARE.
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,8 +1,10 @@
|
|
1
|
-
|
1
|
+
# nexus_parser
|
2
|
+
|
3
|
+
A Ruby lexer/parser for [nexus](https://en.wikipedia.org/wiki/Nexus_file) files, as used in phylogenetic analysis in taxonomy.
|
2
4
|
|
3
5
|
See the test files for usage for now, lots of examples there.
|
4
6
|
|
5
|
-
|
7
|
+
## Note on Patches/Pull Requests
|
6
8
|
|
7
9
|
* Fork the project.
|
8
10
|
* Make your feature addition or bug fix.
|
@@ -10,8 +12,8 @@ See the test files for usage for now, lots of examples there.
|
|
10
12
|
future version unintentionally.
|
11
13
|
* Commit, do not mess with rakefile, version, or history.
|
12
14
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
-
*
|
15
|
+
* Make a pull request.
|
14
16
|
|
15
|
-
|
17
|
+
## License
|
16
18
|
|
17
|
-
|
19
|
+
`nexus_parser` is open source and is now available under the [University of Illinois/NCSA Open Source License](https://en.wikipedia.org/wiki/University_of_Illinois/NCSA_Open_Source_License).
|
data/Rakefile
CHANGED
@@ -1,23 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
1
4
|
require 'rubygems'
|
2
5
|
require 'rake'
|
3
6
|
|
4
|
-
begin
|
5
|
-
require 'jeweler'
|
6
|
-
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "nexus_parser"
|
8
|
-
gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
|
9
|
-
gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
|
10
|
-
gem.email = "diapriid@gmail.com"
|
11
|
-
gem.homepage = "http://github.com/mjy/nexus_parser"
|
12
|
-
gem.authors = ["mjy"]
|
13
|
-
# gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
-
end
|
16
|
-
Jeweler::GemcutterTasks.new
|
17
|
-
rescue LoadError
|
18
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
-
end
|
20
|
-
|
21
7
|
require 'rake/testtask'
|
22
8
|
Rake::TestTask.new(:test) do |test|
|
23
9
|
test.libs << 'lib' << 'test'
|
@@ -38,13 +24,11 @@ rescue LoadError
|
|
38
24
|
end
|
39
25
|
end
|
40
26
|
|
41
|
-
task :test => :check_dependencies
|
42
|
-
|
43
27
|
task :default => :test
|
44
28
|
|
45
|
-
require '
|
29
|
+
require 'rdoc/task'
|
46
30
|
Rake::RDocTask.new do |rdoc|
|
47
|
-
version =
|
31
|
+
version = NexusParser::VERSION
|
48
32
|
|
49
33
|
rdoc.rdoc_dir = 'rdoc'
|
50
34
|
rdoc.title = "nexus_parser #{version}"
|
@@ -8,8 +8,8 @@ class NexusParser::Lexer
|
|
8
8
|
@input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
|
9
9
|
@next_token = nil
|
10
10
|
end
|
11
|
-
|
12
|
-
# checks whether the next token is of the specified class.
|
11
|
+
|
12
|
+
# checks whether the next token is of the specified class.
|
13
13
|
def peek(token_class)
|
14
14
|
token = read_next_token(token_class)
|
15
15
|
return token.class == token_class
|
@@ -21,18 +21,18 @@ class NexusParser::Lexer
|
|
21
21
|
token = read_next_token(token_class)
|
22
22
|
@next_token = nil
|
23
23
|
if token.class != token_class
|
24
|
-
|
24
|
+
raise(NexusParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..40]}...", caller)
|
25
25
|
else
|
26
|
-
|
26
|
+
return token
|
27
27
|
end
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
private
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
# read (and store) the next token from the input, if it has not already been read.
|
32
|
+
def read_next_token(token_class)
|
33
|
+
if @next_token
|
34
|
+
return @next_token
|
35
|
+
else
|
36
36
|
# check for a match on the specified class first
|
37
37
|
if match(token_class)
|
38
38
|
return @next_token
|
@@ -42,19 +42,19 @@ class NexusParser::Lexer
|
|
42
42
|
return @next_token if match(t)
|
43
43
|
}
|
44
44
|
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
# no match, either end of string or lex-error
|
46
|
+
if @input != ''
|
47
|
+
raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
48
|
+
else
|
49
|
+
return nil
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
def match(token_class)
|
55
55
|
if (m = token_class.regexp.match(@input))
|
56
56
|
@next_token = token_class.new(m[1])
|
57
|
-
|
57
|
+
@input = @input[m.end(0)..-1]
|
58
58
|
return true
|
59
59
|
else
|
60
60
|
return false
|
@@ -8,7 +8,7 @@ class NexusParser::Parser
|
|
8
8
|
|
9
9
|
def parse_file
|
10
10
|
# nf = @builder.new_nexus_file # create new local NexusParser instance, nf
|
11
|
-
blks = []
|
11
|
+
# blks = []
|
12
12
|
@lexer.pop(NexusParser::Tokens::NexusStart)
|
13
13
|
|
14
14
|
while @lexer.peek(NexusParser::Tokens::BeginBlk)
|
@@ -215,7 +215,7 @@ class NexusParser::Parser
|
|
215
215
|
|
216
216
|
opts.update({:index => (index - 1), :name => name})
|
217
217
|
|
218
|
-
raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index -1}.") if !opts[:name]
|
218
|
+
raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
|
219
219
|
@builder.update_chr(opts)
|
220
220
|
end
|
221
221
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
module NexusParser::Tokens
|
2
2
|
|
3
|
-
class Token
|
3
|
+
class Token
|
4
4
|
# this allows access the the class attribute regexp, without using a class variable
|
5
5
|
class << self; attr_reader :regexp; end
|
6
|
-
attr_reader :value
|
6
|
+
attr_reader :value
|
7
7
|
def initialize(str)
|
8
8
|
@value = str
|
9
9
|
end
|
@@ -11,7 +11,7 @@ module NexusParser::Tokens
|
|
11
11
|
|
12
12
|
# in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
|
13
13
|
# moving along popping off
|
14
|
-
|
14
|
+
|
15
15
|
class NexusStart < Token
|
16
16
|
@regexp = Regexp.new(/\A.*(\#nexus)\s*/i)
|
17
17
|
end
|
@@ -21,7 +21,7 @@ module NexusParser::Tokens
|
|
21
21
|
# @regexp = Regexp.new(/\A\s*(\[[^\]]*\])\s*/i)
|
22
22
|
# def initialize(str)
|
23
23
|
# str = str[1..-2] # strip the []
|
24
|
-
# str.strip!
|
24
|
+
# str.strip!
|
25
25
|
# @value = str
|
26
26
|
# end
|
27
27
|
# end
|
@@ -31,20 +31,20 @@ module NexusParser::Tokens
|
|
31
31
|
end
|
32
32
|
|
33
33
|
class EndBlk < Token
|
34
|
-
@regexp = Regexp.new(/\A\s*([\s
|
34
|
+
@regexp = Regexp.new(/\A\s*([\s]*End[\s]*;[\s]*)/i)
|
35
35
|
end
|
36
36
|
|
37
|
-
# label
|
37
|
+
# label
|
38
38
|
class AuthorsBlk < Token
|
39
|
-
@regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
|
39
|
+
@regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
|
40
40
|
end
|
41
41
|
|
42
|
-
# label
|
42
|
+
# label
|
43
43
|
class TaxaBlk < Token
|
44
44
|
@regexp = Regexp.new(/\A\s*(\s*Taxa\s*;)\s*/i)
|
45
45
|
end
|
46
46
|
|
47
|
-
# label
|
47
|
+
# label
|
48
48
|
class NotesBlk < Token
|
49
49
|
@regexp = Regexp.new(/\A\s*(\s*Notes\s*;)\s*/i)
|
50
50
|
end
|
@@ -66,9 +66,9 @@ module NexusParser::Tokens
|
|
66
66
|
@regexp = Regexp.new(/\A\s*(format)\s*/i)
|
67
67
|
end
|
68
68
|
|
69
|
-
# label
|
69
|
+
# label
|
70
70
|
class Taxlabels < Token
|
71
|
-
|
71
|
+
@regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
|
72
72
|
end
|
73
73
|
|
74
74
|
# same as ID
|
@@ -77,8 +77,8 @@ module NexusParser::Tokens
|
|
77
77
|
def initialize(str)
|
78
78
|
str.strip!
|
79
79
|
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
80
|
-
str = str[1..-2] if str[0..0] == '"'
|
81
|
-
str.strip!
|
80
|
+
str = str[1..-2] if str[0..0] == '"'
|
81
|
+
str.strip!
|
82
82
|
@value = str
|
83
83
|
end
|
84
84
|
end
|
@@ -91,15 +91,15 @@ module NexusParser::Tokens
|
|
91
91
|
@regexp = Regexp.new(/\A\s*(link.*\s*;)\s*\n*/i)
|
92
92
|
end
|
93
93
|
|
94
|
-
# note we grab EOL and ; here
|
94
|
+
# note we grab EOL and ; here
|
95
95
|
class ValuePair < Token
|
96
|
-
@regexp = Regexp.new(/\A\s*([\w
|
96
|
+
@regexp = Regexp.new(/\A\s*([\w]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s;]+)))[\s;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
|
97
97
|
def initialize(str)
|
98
98
|
str.strip!
|
99
99
|
str = str.split(/=/)
|
100
100
|
str[1].strip!
|
101
|
-
str[1] = str[1][1..-2] if str[1][0..0] == "'"
|
102
|
-
str[1] = str[1][1..-2] if str[1][0..0] == "\""
|
101
|
+
str[1] = str[1][1..-2] if str[1][0..0] == "'"
|
102
|
+
str[1] = str[1][1..-2] if str[1][0..0] == "\""
|
103
103
|
@value = {str[0].strip.downcase.to_sym => str[1].strip}
|
104
104
|
end
|
105
105
|
end
|
@@ -110,10 +110,10 @@ module NexusParser::Tokens
|
|
110
110
|
|
111
111
|
class RowVec < Token
|
112
112
|
@regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
113
|
+
def initialize(str)
|
114
|
+
# meh! Ruby is simpler to read than Perl?
|
115
|
+
# handles both () and {} style multistates
|
116
|
+
s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
|
117
117
|
@value = s
|
118
118
|
end
|
119
119
|
end
|
@@ -131,37 +131,37 @@ module NexusParser::Tokens
|
|
131
131
|
end
|
132
132
|
|
133
133
|
# unparsed blocks
|
134
|
-
|
134
|
+
|
135
135
|
class TreesBlk < Token
|
136
136
|
@regexp = Regexp.new(/\A\s*(trees;.*?END;)\s*/im) # note the multi-line /m
|
137
137
|
end
|
138
138
|
|
139
139
|
class SetsBlk < Token
|
140
|
-
@regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
|
140
|
+
@regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
|
141
141
|
end
|
142
142
|
|
143
143
|
class MqCharModelsBlk < Token
|
144
|
-
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
|
144
|
+
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
|
145
145
|
end
|
146
146
|
|
147
147
|
class LabelsBlk < Token
|
148
|
-
@regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
|
148
|
+
@regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
|
149
149
|
end
|
150
150
|
|
151
151
|
class AssumptionsBlk < Token
|
152
|
-
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
|
152
|
+
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
|
153
153
|
end
|
154
154
|
|
155
155
|
class CodonsBlk < Token
|
156
|
-
@regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
|
156
|
+
@regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
|
157
157
|
end
|
158
158
|
|
159
159
|
class MesquiteBlk < Token
|
160
|
-
@regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
|
160
|
+
@regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
|
161
161
|
end
|
162
162
|
|
163
163
|
class BlkEnd < Token
|
164
|
-
@regexp = Regexp.new(/\A[\s
|
164
|
+
@regexp = Regexp.new(/\A[\s]*(END;)\s*/i)
|
165
165
|
end
|
166
166
|
|
167
167
|
class LBracket < Token
|
@@ -173,13 +173,13 @@ module NexusParser::Tokens
|
|
173
173
|
end
|
174
174
|
|
175
175
|
class LParen < Token
|
176
|
-
|
176
|
+
@regexp = Regexp.new('\A\s*(\()\s*')
|
177
177
|
end
|
178
178
|
|
179
179
|
class RParen < Token
|
180
180
|
@regexp = Regexp.new('\A\s*(\))\s*')
|
181
181
|
end
|
182
|
-
|
182
|
+
|
183
183
|
class Equals < Token
|
184
184
|
@regexp = Regexp.new('\A\s*(=)\s*')
|
185
185
|
end
|
@@ -192,7 +192,7 @@ module NexusParser::Tokens
|
|
192
192
|
class ID < Token
|
193
193
|
@regexp = Regexp.new('\A\s*((\'[^\']+\')|(\w[^,:(); \t\n]*|_)+)\s*')
|
194
194
|
def initialize(str)
|
195
|
-
str.strip!
|
195
|
+
str.strip!
|
196
196
|
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
197
197
|
@value = str
|
198
198
|
end
|
@@ -241,7 +241,7 @@ module NexusParser::Tokens
|
|
241
241
|
NexusParser::Tokens::LabelsBlk,
|
242
242
|
NexusParser::Tokens::TaxaBlk,
|
243
243
|
NexusParser::Tokens::NotesBlk,
|
244
|
-
NexusParser::Tokens::Title,
|
244
|
+
NexusParser::Tokens::Title,
|
245
245
|
NexusParser::Tokens::Taxlabels,
|
246
246
|
NexusParser::Tokens::Dimensions,
|
247
247
|
NexusParser::Tokens::FileLbl,
|
@@ -263,12 +263,12 @@ module NexusParser::Tokens
|
|
263
263
|
NexusParser::Tokens::RParen,
|
264
264
|
NexusParser::Tokens::LBracket,
|
265
265
|
NexusParser::Tokens::RBracket,
|
266
|
-
NexusParser::Tokens::Label, # must be before RowVec
|
266
|
+
NexusParser::Tokens::Label, # must be before RowVec
|
267
267
|
NexusParser::Tokens::RowVec,
|
268
268
|
NexusParser::Tokens::LinkLine,
|
269
269
|
NexusParser::Tokens::ID # need to trash this
|
270
|
-
]
|
270
|
+
]
|
271
271
|
end
|
272
|
-
|
272
|
+
|
273
273
|
end
|
274
274
|
|
data/lib/nexus_parser.rb
CHANGED
@@ -8,12 +8,12 @@
|
|
8
8
|
|
9
9
|
module NexusParser
|
10
10
|
|
11
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
12
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
13
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
14
|
-
|
15
|
-
class NexusParser
|
16
|
-
|
11
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'tokens'))
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'parser'))
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'lexer'))
|
14
|
+
|
15
|
+
class NexusParser
|
16
|
+
|
17
17
|
attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
|
18
18
|
|
19
19
|
def initialize
|
@@ -26,7 +26,9 @@ class NexusParser
|
|
26
26
|
end
|
27
27
|
|
28
28
|
class Character
|
29
|
-
attr_accessor
|
29
|
+
attr_accessor :states, :notes
|
30
|
+
attr_writer :name
|
31
|
+
|
30
32
|
def initialize
|
31
33
|
@name = nil
|
32
34
|
@states = {}
|
@@ -39,7 +41,7 @@ class NexusParser
|
|
39
41
|
:name => ''
|
40
42
|
}.merge!(options)
|
41
43
|
return false if !@opt[:label]
|
42
|
-
|
44
|
+
|
43
45
|
@states.update(@opt[:label] => ChrState.new(@opt[:name]))
|
44
46
|
end
|
45
47
|
|
@@ -49,7 +51,7 @@ class NexusParser
|
|
49
51
|
end
|
50
52
|
|
51
53
|
def name
|
52
|
-
((@name == "") || (@name
|
54
|
+
((@name == "") || (@name.nil?)) ? "Undefined" : @name
|
53
55
|
end
|
54
56
|
end
|
55
57
|
|
@@ -70,17 +72,20 @@ class NexusParser
|
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
73
|
-
class Coding
|
74
|
-
# unfortunately we need this for notes
|
75
|
-
attr_accessor :
|
75
|
+
class Coding
|
76
|
+
# unfortunately we need this for notes
|
77
|
+
attr_accessor :notes
|
78
|
+
attr_writer :state
|
79
|
+
|
76
80
|
def initialize(options = {})
|
77
81
|
@states = options[:states]
|
78
|
-
@notes = []
|
82
|
+
@notes = []
|
79
83
|
end
|
80
84
|
|
81
85
|
def states
|
82
|
-
@states.class == Array ? @states
|
86
|
+
@states.class == Array ? @states : [@states]
|
83
87
|
end
|
88
|
+
|
84
89
|
end
|
85
90
|
|
86
91
|
class Note
|
@@ -98,7 +103,7 @@ class NexusParser
|
|
98
103
|
else
|
99
104
|
n = 'No text recovered, possible parsing error.'
|
100
105
|
end
|
101
|
-
|
106
|
+
|
102
107
|
# THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
|
103
108
|
if n[0..2] =~ /\A\s*\(\s*CM\s*/i
|
104
109
|
n.strip!
|
@@ -107,7 +112,7 @@ class NexusParser
|
|
107
112
|
n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
|
108
113
|
n.strip!
|
109
114
|
n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
|
110
|
-
n = n[1..-2] if n[0..0] == '"'
|
115
|
+
n = n[1..-2] if n[0..0] == '"'
|
111
116
|
end
|
112
117
|
n.strip
|
113
118
|
end
|
@@ -120,11 +125,11 @@ end
|
|
120
125
|
class Builder
|
121
126
|
|
122
127
|
def initialize
|
123
|
-
@nf = NexusParser.new
|
128
|
+
@nf = NexusParser.new
|
124
129
|
end
|
125
130
|
|
126
131
|
def stub_taxon
|
127
|
-
@nf.taxa.push(NexusParser::Taxon.new)
|
132
|
+
@nf.taxa.push(NexusParser::Taxon.new)
|
128
133
|
return @nf.taxa.size
|
129
134
|
end
|
130
135
|
|
@@ -138,11 +143,11 @@ class Builder
|
|
138
143
|
@nf.characters.each_with_index do |c, i|
|
139
144
|
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
140
145
|
@nf.codings[taxon_index.to_i][i] = NexusParser::Coding.new(:states => rowvector[i])
|
141
|
-
|
142
|
-
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
146
|
+
|
147
|
+
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
143
148
|
|
144
149
|
existing_states = @nf.characters[i].state_labels
|
145
|
-
|
150
|
+
|
146
151
|
new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
|
147
152
|
new_states.delete("?") # we don't add this to the db
|
148
153
|
new_states = new_states - existing_states
|
@@ -160,12 +165,12 @@ class Builder
|
|
160
165
|
end
|
161
166
|
@nf.vars.update(hash)
|
162
167
|
end
|
163
|
-
|
168
|
+
|
164
169
|
def update_taxon(options = {})
|
165
170
|
@opt = {
|
166
171
|
:name => ''
|
167
172
|
}.merge!(options)
|
168
|
-
return false if !@opt[:index]
|
173
|
+
return false if !@opt[:index]
|
169
174
|
(@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
|
170
175
|
end
|
171
176
|
|
@@ -177,29 +182,29 @@ class Builder
|
|
177
182
|
return false if !@opt[:index]
|
178
183
|
|
179
184
|
@index = @opt[:index].to_i
|
180
|
-
|
185
|
+
|
181
186
|
# need to create the characters
|
182
|
-
|
187
|
+
|
183
188
|
raise(NexusParser::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
184
189
|
|
185
190
|
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
186
|
-
|
191
|
+
|
187
192
|
@opt.delete(:index)
|
188
193
|
@opt.delete(:name)
|
189
|
-
|
194
|
+
|
190
195
|
# the rest have states
|
191
196
|
@opt.keys.each do |k|
|
192
|
-
|
197
|
+
|
193
198
|
if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
|
194
|
-
|
199
|
+
|
195
200
|
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
196
201
|
update_state(@index, :index => k, :name => @opt[k])
|
197
|
-
|
202
|
+
|
198
203
|
else # doesn't, create it
|
199
204
|
@nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
|
200
205
|
end
|
201
206
|
end
|
202
|
-
|
207
|
+
|
203
208
|
end
|
204
209
|
|
205
210
|
def update_state(chr_index, options = {})
|
@@ -218,11 +223,11 @@ class Builder
|
|
218
223
|
case @opt[:type]
|
219
224
|
|
220
225
|
# Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
|
221
|
-
when 'TEXT' # a footnote
|
226
|
+
when 'TEXT' # a footnote
|
222
227
|
if @opt[:file]
|
223
228
|
@nf.notes << NexusParser::Note.new(@opt)
|
224
|
-
|
225
|
-
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
229
|
+
|
230
|
+
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
226
231
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
|
227
232
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
228
233
|
|
@@ -230,7 +235,7 @@ class Builder
|
|
230
235
|
@nf.taxa[@opt[:taxon].to_i - 1].notes << NexusParser::Note.new(@opt)
|
231
236
|
|
232
237
|
elsif @opt[:character] && !@opt[:taxon]
|
233
|
-
|
238
|
+
|
234
239
|
@nf.characters[@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
235
240
|
end
|
236
241
|
|
@@ -244,7 +249,7 @@ class Builder
|
|
244
249
|
@nf.characters[@opt[:c].to_i - 1].notes << NexusParser::Note.new(@opt)
|
245
250
|
end
|
246
251
|
end
|
247
|
-
|
252
|
+
|
248
253
|
end
|
249
254
|
|
250
255
|
def nexus_file
|
@@ -264,7 +269,7 @@ end # end module
|
|
264
269
|
def parse_nexus_file(input)
|
265
270
|
@input = input
|
266
271
|
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
267
|
-
# quickly peek at the input, does this look like a Nexus file?
|
272
|
+
# quickly peek at the input, does this look like a Nexus file?
|
268
273
|
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
269
274
|
raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
270
275
|
end
|
@@ -272,7 +277,7 @@ def parse_nexus_file(input)
|
|
272
277
|
builder = NexusParser::Builder.new
|
273
278
|
lexer = NexusParser::Lexer.new(@input)
|
274
279
|
NexusParser::Parser.new(lexer, builder).parse_file
|
275
|
-
|
276
|
-
return builder.nexus_file
|
280
|
+
|
281
|
+
return builder.nexus_file
|
277
282
|
end
|
278
283
|
|