nexus_parser 1.1.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/LICENSE +25 -17
- data/{README.rdoc → README.md} +7 -5
- data/Rakefile +5 -21
- data/lib/{lexer.rb → nexus_parser/lexer.rb} +17 -17
- data/lib/{parser.rb → nexus_parser/parser.rb} +2 -2
- data/lib/{tokens.rb → nexus_parser/tokens.rb} +36 -36
- data/lib/nexus_parser/version.rb +5 -0
- data/lib/nexus_parser.rb +44 -39
- data/nexus_parser.gemspec +49 -50
- data/test/MX_test_03.nex +3 -3
- data/test/test_nexus_parser.rb +134 -138
- metadata +111 -63
- data/MIT-LICENSE +0 -20
- data/README +0 -13
- data/VERSION +0 -1
- data/init.rb +0 -1
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 93e9b7ced7b53e19e2cd33c72c433736058c4adcf7555dc6b43635c08f5dcea7
|
4
|
+
data.tar.gz: 4b9bce06037be960e29e1e4f02bbeef286f47a35e977384a5d451e1f0c3e3f91
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2297128f4ad470e7de1760996aaa712ed8bc691fee0a3cc8a59de54cf1b40bcc9d1cd6478049ae14f93b90ea208a93b6bc4872b76ecf8fc99221ea1b583f954d
|
7
|
+
data.tar.gz: 19615c9fcdc2469bf32e681ac3b81b1a4ca8f468a0154671573b73c0e54f247d78b075ee3a2a1d63a4ba21c0cd657dfe656f8140b8feed07ca54d93fadf1772a
|
data/.gitignore
CHANGED
data/LICENSE
CHANGED
@@ -1,20 +1,28 @@
|
|
1
|
-
Copyright (c)
|
1
|
+
Copyright (c) 2008- Matt Yoder. All rights reserved.
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
-
permit persons to whom the Software is furnished to do so, subject to
|
9
|
-
the following conditions:
|
3
|
+
Developed by: Matt Yoder, Species File Group, and Collaborators
|
4
|
+
University of Illinois
|
5
|
+
https://speciesfilegroup.org
|
10
6
|
|
11
|
-
|
12
|
-
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
8
|
+
this software and associated documentation files (the "Software"), to deal with
|
9
|
+
the Software without restriction, including without limitation the rights
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
11
|
+
of the Software, and to permit persons to whom the Software is furnished to
|
12
|
+
do so, subject to the following conditions:
|
13
|
+
* Redistributions of source code must retain the above copyright notice,
|
14
|
+
this list of conditions and the following disclaimers.
|
15
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
16
|
+
this list of conditions and the following disclaimers in the documentation
|
17
|
+
and/or other materials provided with the distribution.
|
18
|
+
* Neither the names of <NAME OF DEVELOPMENT GROUP>, <NAME OF INSTITUTION>,
|
19
|
+
nor the names of its contributors may be used to endorse or promote products
|
20
|
+
derived from this Software without specific prior written permission.
|
13
21
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
OF
|
20
|
-
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
25
|
+
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
28
|
+
SOFTWARE.
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,8 +1,10 @@
|
|
1
|
-
|
1
|
+
# nexus_parser
|
2
|
+
|
3
|
+
A Ruby lexer/parser for [nexus](https://en.wikipedia.org/wiki/Nexus_file) files, as used in phylogenetic analysis in taxonomy.
|
2
4
|
|
3
5
|
See the test files for usage for now, lots of examples there.
|
4
6
|
|
5
|
-
|
7
|
+
## Note on Patches/Pull Requests
|
6
8
|
|
7
9
|
* Fork the project.
|
8
10
|
* Make your feature addition or bug fix.
|
@@ -10,8 +12,8 @@ See the test files for usage for now, lots of examples there.
|
|
10
12
|
future version unintentionally.
|
11
13
|
* Commit, do not mess with rakefile, version, or history.
|
12
14
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
-
*
|
15
|
+
* Make a pull request.
|
14
16
|
|
15
|
-
|
17
|
+
## License
|
16
18
|
|
17
|
-
|
19
|
+
`nexus_parser` is open source and is now available under the [University of Illinois/NCSA Open Source License](https://en.wikipedia.org/wiki/University_of_Illinois/NCSA_Open_Source_License).
|
data/Rakefile
CHANGED
@@ -1,23 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
1
4
|
require 'rubygems'
|
2
5
|
require 'rake'
|
3
6
|
|
4
|
-
begin
|
5
|
-
require 'jeweler'
|
6
|
-
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "nexus_parser"
|
8
|
-
gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
|
9
|
-
gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
|
10
|
-
gem.email = "diapriid@gmail.com"
|
11
|
-
gem.homepage = "http://github.com/mjy/nexus_parser"
|
12
|
-
gem.authors = ["mjy"]
|
13
|
-
# gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
-
end
|
16
|
-
Jeweler::GemcutterTasks.new
|
17
|
-
rescue LoadError
|
18
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
-
end
|
20
|
-
|
21
7
|
require 'rake/testtask'
|
22
8
|
Rake::TestTask.new(:test) do |test|
|
23
9
|
test.libs << 'lib' << 'test'
|
@@ -38,13 +24,11 @@ rescue LoadError
|
|
38
24
|
end
|
39
25
|
end
|
40
26
|
|
41
|
-
task :test => :check_dependencies
|
42
|
-
|
43
27
|
task :default => :test
|
44
28
|
|
45
|
-
require '
|
29
|
+
require 'rdoc/task'
|
46
30
|
Rake::RDocTask.new do |rdoc|
|
47
|
-
version =
|
31
|
+
version = NexusParser::VERSION
|
48
32
|
|
49
33
|
rdoc.rdoc_dir = 'rdoc'
|
50
34
|
rdoc.title = "nexus_parser #{version}"
|
@@ -8,8 +8,8 @@ class NexusParser::Lexer
|
|
8
8
|
@input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
|
9
9
|
@next_token = nil
|
10
10
|
end
|
11
|
-
|
12
|
-
# checks whether the next token is of the specified class.
|
11
|
+
|
12
|
+
# checks whether the next token is of the specified class.
|
13
13
|
def peek(token_class)
|
14
14
|
token = read_next_token(token_class)
|
15
15
|
return token.class == token_class
|
@@ -21,18 +21,18 @@ class NexusParser::Lexer
|
|
21
21
|
token = read_next_token(token_class)
|
22
22
|
@next_token = nil
|
23
23
|
if token.class != token_class
|
24
|
-
|
24
|
+
raise(NexusParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..40]}...", caller)
|
25
25
|
else
|
26
|
-
|
26
|
+
return token
|
27
27
|
end
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
private
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
# read (and store) the next token from the input, if it has not already been read.
|
32
|
+
def read_next_token(token_class)
|
33
|
+
if @next_token
|
34
|
+
return @next_token
|
35
|
+
else
|
36
36
|
# check for a match on the specified class first
|
37
37
|
if match(token_class)
|
38
38
|
return @next_token
|
@@ -42,19 +42,19 @@ class NexusParser::Lexer
|
|
42
42
|
return @next_token if match(t)
|
43
43
|
}
|
44
44
|
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
# no match, either end of string or lex-error
|
46
|
+
if @input != ''
|
47
|
+
raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
48
|
+
else
|
49
|
+
return nil
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
def match(token_class)
|
55
55
|
if (m = token_class.regexp.match(@input))
|
56
56
|
@next_token = token_class.new(m[1])
|
57
|
-
|
57
|
+
@input = @input[m.end(0)..-1]
|
58
58
|
return true
|
59
59
|
else
|
60
60
|
return false
|
@@ -8,7 +8,7 @@ class NexusParser::Parser
|
|
8
8
|
|
9
9
|
def parse_file
|
10
10
|
# nf = @builder.new_nexus_file # create new local NexusParser instance, nf
|
11
|
-
blks = []
|
11
|
+
# blks = []
|
12
12
|
@lexer.pop(NexusParser::Tokens::NexusStart)
|
13
13
|
|
14
14
|
while @lexer.peek(NexusParser::Tokens::BeginBlk)
|
@@ -215,7 +215,7 @@ class NexusParser::Parser
|
|
215
215
|
|
216
216
|
opts.update({:index => (index - 1), :name => name})
|
217
217
|
|
218
|
-
raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index -1}.") if !opts[:name]
|
218
|
+
raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
|
219
219
|
@builder.update_chr(opts)
|
220
220
|
end
|
221
221
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
module NexusParser::Tokens
|
2
2
|
|
3
|
-
class Token
|
3
|
+
class Token
|
4
4
|
# this allows access the the class attribute regexp, without using a class variable
|
5
5
|
class << self; attr_reader :regexp; end
|
6
|
-
attr_reader :value
|
6
|
+
attr_reader :value
|
7
7
|
def initialize(str)
|
8
8
|
@value = str
|
9
9
|
end
|
@@ -11,7 +11,7 @@ module NexusParser::Tokens
|
|
11
11
|
|
12
12
|
# in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
|
13
13
|
# moving along popping off
|
14
|
-
|
14
|
+
|
15
15
|
class NexusStart < Token
|
16
16
|
@regexp = Regexp.new(/\A.*(\#nexus)\s*/i)
|
17
17
|
end
|
@@ -21,7 +21,7 @@ module NexusParser::Tokens
|
|
21
21
|
# @regexp = Regexp.new(/\A\s*(\[[^\]]*\])\s*/i)
|
22
22
|
# def initialize(str)
|
23
23
|
# str = str[1..-2] # strip the []
|
24
|
-
# str.strip!
|
24
|
+
# str.strip!
|
25
25
|
# @value = str
|
26
26
|
# end
|
27
27
|
# end
|
@@ -31,20 +31,20 @@ module NexusParser::Tokens
|
|
31
31
|
end
|
32
32
|
|
33
33
|
class EndBlk < Token
|
34
|
-
@regexp = Regexp.new(/\A\s*([\s
|
34
|
+
@regexp = Regexp.new(/\A\s*([\s]*End[\s]*;[\s]*)/i)
|
35
35
|
end
|
36
36
|
|
37
|
-
# label
|
37
|
+
# label
|
38
38
|
class AuthorsBlk < Token
|
39
|
-
@regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
|
39
|
+
@regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
|
40
40
|
end
|
41
41
|
|
42
|
-
# label
|
42
|
+
# label
|
43
43
|
class TaxaBlk < Token
|
44
44
|
@regexp = Regexp.new(/\A\s*(\s*Taxa\s*;)\s*/i)
|
45
45
|
end
|
46
46
|
|
47
|
-
# label
|
47
|
+
# label
|
48
48
|
class NotesBlk < Token
|
49
49
|
@regexp = Regexp.new(/\A\s*(\s*Notes\s*;)\s*/i)
|
50
50
|
end
|
@@ -66,9 +66,9 @@ module NexusParser::Tokens
|
|
66
66
|
@regexp = Regexp.new(/\A\s*(format)\s*/i)
|
67
67
|
end
|
68
68
|
|
69
|
-
# label
|
69
|
+
# label
|
70
70
|
class Taxlabels < Token
|
71
|
-
|
71
|
+
@regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
|
72
72
|
end
|
73
73
|
|
74
74
|
# same as ID
|
@@ -77,8 +77,8 @@ module NexusParser::Tokens
|
|
77
77
|
def initialize(str)
|
78
78
|
str.strip!
|
79
79
|
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
80
|
-
str = str[1..-2] if str[0..0] == '"'
|
81
|
-
str.strip!
|
80
|
+
str = str[1..-2] if str[0..0] == '"'
|
81
|
+
str.strip!
|
82
82
|
@value = str
|
83
83
|
end
|
84
84
|
end
|
@@ -91,15 +91,15 @@ module NexusParser::Tokens
|
|
91
91
|
@regexp = Regexp.new(/\A\s*(link.*\s*;)\s*\n*/i)
|
92
92
|
end
|
93
93
|
|
94
|
-
# note we grab EOL and ; here
|
94
|
+
# note we grab EOL and ; here
|
95
95
|
class ValuePair < Token
|
96
|
-
@regexp = Regexp.new(/\A\s*([\w
|
96
|
+
@regexp = Regexp.new(/\A\s*([\w]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s;]+)))[\s;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
|
97
97
|
def initialize(str)
|
98
98
|
str.strip!
|
99
99
|
str = str.split(/=/)
|
100
100
|
str[1].strip!
|
101
|
-
str[1] = str[1][1..-2] if str[1][0..0] == "'"
|
102
|
-
str[1] = str[1][1..-2] if str[1][0..0] == "\""
|
101
|
+
str[1] = str[1][1..-2] if str[1][0..0] == "'"
|
102
|
+
str[1] = str[1][1..-2] if str[1][0..0] == "\""
|
103
103
|
@value = {str[0].strip.downcase.to_sym => str[1].strip}
|
104
104
|
end
|
105
105
|
end
|
@@ -110,10 +110,10 @@ module NexusParser::Tokens
|
|
110
110
|
|
111
111
|
class RowVec < Token
|
112
112
|
@regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
113
|
+
def initialize(str)
|
114
|
+
# meh! Ruby is simpler to read than Perl?
|
115
|
+
# handles both () and {} style multistates
|
116
|
+
s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
|
117
117
|
@value = s
|
118
118
|
end
|
119
119
|
end
|
@@ -131,37 +131,37 @@ module NexusParser::Tokens
|
|
131
131
|
end
|
132
132
|
|
133
133
|
# unparsed blocks
|
134
|
-
|
134
|
+
|
135
135
|
class TreesBlk < Token
|
136
136
|
@regexp = Regexp.new(/\A\s*(trees;.*?END;)\s*/im) # note the multi-line /m
|
137
137
|
end
|
138
138
|
|
139
139
|
class SetsBlk < Token
|
140
|
-
@regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
|
140
|
+
@regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
|
141
141
|
end
|
142
142
|
|
143
143
|
class MqCharModelsBlk < Token
|
144
|
-
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
|
144
|
+
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
|
145
145
|
end
|
146
146
|
|
147
147
|
class LabelsBlk < Token
|
148
|
-
@regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
|
148
|
+
@regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
|
149
149
|
end
|
150
150
|
|
151
151
|
class AssumptionsBlk < Token
|
152
|
-
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
|
152
|
+
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
|
153
153
|
end
|
154
154
|
|
155
155
|
class CodonsBlk < Token
|
156
|
-
@regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
|
156
|
+
@regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
|
157
157
|
end
|
158
158
|
|
159
159
|
class MesquiteBlk < Token
|
160
|
-
@regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
|
160
|
+
@regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
|
161
161
|
end
|
162
162
|
|
163
163
|
class BlkEnd < Token
|
164
|
-
@regexp = Regexp.new(/\A[\s
|
164
|
+
@regexp = Regexp.new(/\A[\s]*(END;)\s*/i)
|
165
165
|
end
|
166
166
|
|
167
167
|
class LBracket < Token
|
@@ -173,13 +173,13 @@ module NexusParser::Tokens
|
|
173
173
|
end
|
174
174
|
|
175
175
|
class LParen < Token
|
176
|
-
|
176
|
+
@regexp = Regexp.new('\A\s*(\()\s*')
|
177
177
|
end
|
178
178
|
|
179
179
|
class RParen < Token
|
180
180
|
@regexp = Regexp.new('\A\s*(\))\s*')
|
181
181
|
end
|
182
|
-
|
182
|
+
|
183
183
|
class Equals < Token
|
184
184
|
@regexp = Regexp.new('\A\s*(=)\s*')
|
185
185
|
end
|
@@ -192,7 +192,7 @@ module NexusParser::Tokens
|
|
192
192
|
class ID < Token
|
193
193
|
@regexp = Regexp.new('\A\s*((\'[^\']+\')|(\w[^,:(); \t\n]*|_)+)\s*')
|
194
194
|
def initialize(str)
|
195
|
-
str.strip!
|
195
|
+
str.strip!
|
196
196
|
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
197
197
|
@value = str
|
198
198
|
end
|
@@ -241,7 +241,7 @@ module NexusParser::Tokens
|
|
241
241
|
NexusParser::Tokens::LabelsBlk,
|
242
242
|
NexusParser::Tokens::TaxaBlk,
|
243
243
|
NexusParser::Tokens::NotesBlk,
|
244
|
-
NexusParser::Tokens::Title,
|
244
|
+
NexusParser::Tokens::Title,
|
245
245
|
NexusParser::Tokens::Taxlabels,
|
246
246
|
NexusParser::Tokens::Dimensions,
|
247
247
|
NexusParser::Tokens::FileLbl,
|
@@ -263,12 +263,12 @@ module NexusParser::Tokens
|
|
263
263
|
NexusParser::Tokens::RParen,
|
264
264
|
NexusParser::Tokens::LBracket,
|
265
265
|
NexusParser::Tokens::RBracket,
|
266
|
-
NexusParser::Tokens::Label, # must be before RowVec
|
266
|
+
NexusParser::Tokens::Label, # must be before RowVec
|
267
267
|
NexusParser::Tokens::RowVec,
|
268
268
|
NexusParser::Tokens::LinkLine,
|
269
269
|
NexusParser::Tokens::ID # need to trash this
|
270
|
-
]
|
270
|
+
]
|
271
271
|
end
|
272
|
-
|
272
|
+
|
273
273
|
end
|
274
274
|
|
data/lib/nexus_parser.rb
CHANGED
@@ -8,12 +8,12 @@
|
|
8
8
|
|
9
9
|
module NexusParser
|
10
10
|
|
11
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
12
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
13
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
14
|
-
|
15
|
-
class NexusParser
|
16
|
-
|
11
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'tokens'))
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'parser'))
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'lexer'))
|
14
|
+
|
15
|
+
class NexusParser
|
16
|
+
|
17
17
|
attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
|
18
18
|
|
19
19
|
def initialize
|
@@ -26,7 +26,9 @@ class NexusParser
|
|
26
26
|
end
|
27
27
|
|
28
28
|
class Character
|
29
|
-
attr_accessor
|
29
|
+
attr_accessor :states, :notes
|
30
|
+
attr_writer :name
|
31
|
+
|
30
32
|
def initialize
|
31
33
|
@name = nil
|
32
34
|
@states = {}
|
@@ -39,7 +41,7 @@ class NexusParser
|
|
39
41
|
:name => ''
|
40
42
|
}.merge!(options)
|
41
43
|
return false if !@opt[:label]
|
42
|
-
|
44
|
+
|
43
45
|
@states.update(@opt[:label] => ChrState.new(@opt[:name]))
|
44
46
|
end
|
45
47
|
|
@@ -49,7 +51,7 @@ class NexusParser
|
|
49
51
|
end
|
50
52
|
|
51
53
|
def name
|
52
|
-
((@name == "") || (@name
|
54
|
+
((@name == "") || (@name.nil?)) ? "Undefined" : @name
|
53
55
|
end
|
54
56
|
end
|
55
57
|
|
@@ -70,17 +72,20 @@ class NexusParser
|
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
73
|
-
class Coding
|
74
|
-
# unfortunately we need this for notes
|
75
|
-
attr_accessor :
|
75
|
+
class Coding
|
76
|
+
# unfortunately we need this for notes
|
77
|
+
attr_accessor :notes
|
78
|
+
attr_writer :state
|
79
|
+
|
76
80
|
def initialize(options = {})
|
77
81
|
@states = options[:states]
|
78
|
-
@notes = []
|
82
|
+
@notes = []
|
79
83
|
end
|
80
84
|
|
81
85
|
def states
|
82
|
-
@states.class == Array ? @states
|
86
|
+
@states.class == Array ? @states : [@states]
|
83
87
|
end
|
88
|
+
|
84
89
|
end
|
85
90
|
|
86
91
|
class Note
|
@@ -98,7 +103,7 @@ class NexusParser
|
|
98
103
|
else
|
99
104
|
n = 'No text recovered, possible parsing error.'
|
100
105
|
end
|
101
|
-
|
106
|
+
|
102
107
|
# THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
|
103
108
|
if n[0..2] =~ /\A\s*\(\s*CM\s*/i
|
104
109
|
n.strip!
|
@@ -107,7 +112,7 @@ class NexusParser
|
|
107
112
|
n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
|
108
113
|
n.strip!
|
109
114
|
n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
|
110
|
-
n = n[1..-2] if n[0..0] == '"'
|
115
|
+
n = n[1..-2] if n[0..0] == '"'
|
111
116
|
end
|
112
117
|
n.strip
|
113
118
|
end
|
@@ -120,11 +125,11 @@ end
|
|
120
125
|
class Builder
|
121
126
|
|
122
127
|
def initialize
|
123
|
-
@nf = NexusParser.new
|
128
|
+
@nf = NexusParser.new
|
124
129
|
end
|
125
130
|
|
126
131
|
def stub_taxon
|
127
|
-
@nf.taxa.push(NexusParser::Taxon.new)
|
132
|
+
@nf.taxa.push(NexusParser::Taxon.new)
|
128
133
|
return @nf.taxa.size
|
129
134
|
end
|
130
135
|
|
@@ -138,11 +143,11 @@ class Builder
|
|
138
143
|
@nf.characters.each_with_index do |c, i|
|
139
144
|
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
140
145
|
@nf.codings[taxon_index.to_i][i] = NexusParser::Coding.new(:states => rowvector[i])
|
141
|
-
|
142
|
-
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
146
|
+
|
147
|
+
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
143
148
|
|
144
149
|
existing_states = @nf.characters[i].state_labels
|
145
|
-
|
150
|
+
|
146
151
|
new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
|
147
152
|
new_states.delete("?") # we don't add this to the db
|
148
153
|
new_states = new_states - existing_states
|
@@ -160,12 +165,12 @@ class Builder
|
|
160
165
|
end
|
161
166
|
@nf.vars.update(hash)
|
162
167
|
end
|
163
|
-
|
168
|
+
|
164
169
|
def update_taxon(options = {})
|
165
170
|
@opt = {
|
166
171
|
:name => ''
|
167
172
|
}.merge!(options)
|
168
|
-
return false if !@opt[:index]
|
173
|
+
return false if !@opt[:index]
|
169
174
|
(@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
|
170
175
|
end
|
171
176
|
|
@@ -177,29 +182,29 @@ class Builder
|
|
177
182
|
return false if !@opt[:index]
|
178
183
|
|
179
184
|
@index = @opt[:index].to_i
|
180
|
-
|
185
|
+
|
181
186
|
# need to create the characters
|
182
|
-
|
187
|
+
|
183
188
|
raise(NexusParser::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
184
189
|
|
185
190
|
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
186
|
-
|
191
|
+
|
187
192
|
@opt.delete(:index)
|
188
193
|
@opt.delete(:name)
|
189
|
-
|
194
|
+
|
190
195
|
# the rest have states
|
191
196
|
@opt.keys.each do |k|
|
192
|
-
|
197
|
+
|
193
198
|
if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
|
194
|
-
|
199
|
+
|
195
200
|
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
196
201
|
update_state(@index, :index => k, :name => @opt[k])
|
197
|
-
|
202
|
+
|
198
203
|
else # doesn't, create it
|
199
204
|
@nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
|
200
205
|
end
|
201
206
|
end
|
202
|
-
|
207
|
+
|
203
208
|
end
|
204
209
|
|
205
210
|
def update_state(chr_index, options = {})
|
@@ -218,11 +223,11 @@ class Builder
|
|
218
223
|
case @opt[:type]
|
219
224
|
|
220
225
|
# Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
|
221
|
-
when 'TEXT' # a footnote
|
226
|
+
when 'TEXT' # a footnote
|
222
227
|
if @opt[:file]
|
223
228
|
@nf.notes << NexusParser::Note.new(@opt)
|
224
|
-
|
225
|
-
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
229
|
+
|
230
|
+
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
226
231
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
|
227
232
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
228
233
|
|
@@ -230,7 +235,7 @@ class Builder
|
|
230
235
|
@nf.taxa[@opt[:taxon].to_i - 1].notes << NexusParser::Note.new(@opt)
|
231
236
|
|
232
237
|
elsif @opt[:character] && !@opt[:taxon]
|
233
|
-
|
238
|
+
|
234
239
|
@nf.characters[@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
235
240
|
end
|
236
241
|
|
@@ -244,7 +249,7 @@ class Builder
|
|
244
249
|
@nf.characters[@opt[:c].to_i - 1].notes << NexusParser::Note.new(@opt)
|
245
250
|
end
|
246
251
|
end
|
247
|
-
|
252
|
+
|
248
253
|
end
|
249
254
|
|
250
255
|
def nexus_file
|
@@ -264,7 +269,7 @@ end # end module
|
|
264
269
|
def parse_nexus_file(input)
|
265
270
|
@input = input
|
266
271
|
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
267
|
-
# quickly peek at the input, does this look like a Nexus file?
|
272
|
+
# quickly peek at the input, does this look like a Nexus file?
|
268
273
|
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
269
274
|
raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
270
275
|
end
|
@@ -272,7 +277,7 @@ def parse_nexus_file(input)
|
|
272
277
|
builder = NexusParser::Builder.new
|
273
278
|
lexer = NexusParser::Lexer.new(@input)
|
274
279
|
NexusParser::Parser.new(lexer, builder).parse_file
|
275
|
-
|
276
|
-
return builder.nexus_file
|
280
|
+
|
281
|
+
return builder.nexus_file
|
277
282
|
end
|
278
283
|
|