nexus_parser 1.1.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/LICENSE +25 -17
- data/{README.rdoc → README.md} +7 -5
- data/Rakefile +5 -21
- data/lib/{lexer.rb → nexus_parser/lexer.rb} +17 -17
- data/lib/{parser.rb → nexus_parser/parser.rb} +5 -3
- data/lib/{tokens.rb → nexus_parser/tokens.rb} +41 -36
- data/lib/nexus_parser/version.rb +5 -0
- data/lib/nexus_parser.rb +44 -41
- data/nexus_parser.gemspec +49 -51
- data/test/MX_test_03.nex +3 -3
- data/test/test_nexus_parser.rb +138 -133
- metadata +111 -58
- data/MIT-LICENSE +0 -20
- data/README +0 -13
- data/VERSION +0 -1
- data/init.rb +0 -1
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 93e9b7ced7b53e19e2cd33c72c433736058c4adcf7555dc6b43635c08f5dcea7
|
|
4
|
+
data.tar.gz: 4b9bce06037be960e29e1e4f02bbeef286f47a35e977384a5d451e1f0c3e3f91
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 2297128f4ad470e7de1760996aaa712ed8bc691fee0a3cc8a59de54cf1b40bcc9d1cd6478049ae14f93b90ea208a93b6bc4872b76ecf8fc99221ea1b583f954d
|
|
7
|
+
data.tar.gz: 19615c9fcdc2469bf32e681ac3b81b1a4ca8f468a0154671573b73c0e54f247d78b075ee3a2a1d63a4ba21c0cd657dfe656f8140b8feed07ca54d93fadf1772a
|
data/.gitignore
CHANGED
data/LICENSE
CHANGED
|
@@ -1,20 +1,28 @@
|
|
|
1
|
-
Copyright (c)
|
|
1
|
+
Copyright (c) 2008- Matt Yoder. All rights reserved.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
-
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
-
the following conditions:
|
|
3
|
+
Developed by: Matt Yoder, Species File Group, and Collaborators
|
|
4
|
+
University of Illinois
|
|
5
|
+
https://speciesfilegroup.org
|
|
10
6
|
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
8
|
+
this software and associated documentation files (the "Software"), to deal with
|
|
9
|
+
the Software without restriction, including without limitation the rights
|
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
11
|
+
of the Software, and to permit persons to whom the Software is furnished to
|
|
12
|
+
do so, subject to the following conditions:
|
|
13
|
+
* Redistributions of source code must retain the above copyright notice,
|
|
14
|
+
this list of conditions and the following disclaimers.
|
|
15
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
16
|
+
this list of conditions and the following disclaimers in the documentation
|
|
17
|
+
and/or other materials provided with the distribution.
|
|
18
|
+
* Neither the names of <NAME OF DEVELOPMENT GROUP>, <NAME OF INSTITUTION>,
|
|
19
|
+
nor the names of its contributors may be used to endorse or promote products
|
|
20
|
+
derived from this Software without specific prior written permission.
|
|
13
21
|
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
OF
|
|
20
|
-
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
|
28
|
+
SOFTWARE.
|
data/{README.rdoc → README.md}
RENAMED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
# nexus_parser
|
|
2
|
+
|
|
3
|
+
A Ruby lexer/parser for [nexus](https://en.wikipedia.org/wiki/Nexus_file) files, as used in phylogenetic analysis in taxonomy.
|
|
2
4
|
|
|
3
5
|
See the test files for usage for now, lots of examples there.
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
## Note on Patches/Pull Requests
|
|
6
8
|
|
|
7
9
|
* Fork the project.
|
|
8
10
|
* Make your feature addition or bug fix.
|
|
@@ -10,8 +12,8 @@ See the test files for usage for now, lots of examples there.
|
|
|
10
12
|
future version unintentionally.
|
|
11
13
|
* Commit, do not mess with rakefile, version, or history.
|
|
12
14
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
|
13
|
-
*
|
|
15
|
+
* Make a pull request.
|
|
14
16
|
|
|
15
|
-
|
|
17
|
+
## License
|
|
16
18
|
|
|
17
|
-
|
|
19
|
+
`nexus_parser` is open source and is now available under the [University of Illinois/NCSA Open Source License](https://en.wikipedia.org/wiki/University_of_Illinois/NCSA_Open_Source_License).
|
data/Rakefile
CHANGED
|
@@ -1,23 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/gem_tasks"
|
|
1
4
|
require 'rubygems'
|
|
2
5
|
require 'rake'
|
|
3
6
|
|
|
4
|
-
begin
|
|
5
|
-
require 'jeweler'
|
|
6
|
-
Jeweler::Tasks.new do |gem|
|
|
7
|
-
gem.name = "nexus_parser"
|
|
8
|
-
gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
|
|
9
|
-
gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
|
|
10
|
-
gem.email = "diapriid@gmail.com"
|
|
11
|
-
gem.homepage = "http://github.com/mjy/nexus_parser"
|
|
12
|
-
gem.authors = ["mjy"]
|
|
13
|
-
# gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
|
14
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
15
|
-
end
|
|
16
|
-
Jeweler::GemcutterTasks.new
|
|
17
|
-
rescue LoadError
|
|
18
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
|
19
|
-
end
|
|
20
|
-
|
|
21
7
|
require 'rake/testtask'
|
|
22
8
|
Rake::TestTask.new(:test) do |test|
|
|
23
9
|
test.libs << 'lib' << 'test'
|
|
@@ -38,13 +24,11 @@ rescue LoadError
|
|
|
38
24
|
end
|
|
39
25
|
end
|
|
40
26
|
|
|
41
|
-
task :test => :check_dependencies
|
|
42
|
-
|
|
43
27
|
task :default => :test
|
|
44
28
|
|
|
45
|
-
require '
|
|
29
|
+
require 'rdoc/task'
|
|
46
30
|
Rake::RDocTask.new do |rdoc|
|
|
47
|
-
version =
|
|
31
|
+
version = NexusParser::VERSION
|
|
48
32
|
|
|
49
33
|
rdoc.rdoc_dir = 'rdoc'
|
|
50
34
|
rdoc.title = "nexus_parser #{version}"
|
|
@@ -8,8 +8,8 @@ class NexusParser::Lexer
|
|
|
8
8
|
@input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
|
|
9
9
|
@next_token = nil
|
|
10
10
|
end
|
|
11
|
-
|
|
12
|
-
# checks whether the next token is of the specified class.
|
|
11
|
+
|
|
12
|
+
# checks whether the next token is of the specified class.
|
|
13
13
|
def peek(token_class)
|
|
14
14
|
token = read_next_token(token_class)
|
|
15
15
|
return token.class == token_class
|
|
@@ -21,18 +21,18 @@ class NexusParser::Lexer
|
|
|
21
21
|
token = read_next_token(token_class)
|
|
22
22
|
@next_token = nil
|
|
23
23
|
if token.class != token_class
|
|
24
|
-
|
|
24
|
+
raise(NexusParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..40]}...", caller)
|
|
25
25
|
else
|
|
26
|
-
|
|
26
|
+
return token
|
|
27
27
|
end
|
|
28
28
|
end
|
|
29
|
-
|
|
29
|
+
|
|
30
30
|
private
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
31
|
+
# read (and store) the next token from the input, if it has not already been read.
|
|
32
|
+
def read_next_token(token_class)
|
|
33
|
+
if @next_token
|
|
34
|
+
return @next_token
|
|
35
|
+
else
|
|
36
36
|
# check for a match on the specified class first
|
|
37
37
|
if match(token_class)
|
|
38
38
|
return @next_token
|
|
@@ -42,19 +42,19 @@ class NexusParser::Lexer
|
|
|
42
42
|
return @next_token if match(t)
|
|
43
43
|
}
|
|
44
44
|
end
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
45
|
+
# no match, either end of string or lex-error
|
|
46
|
+
if @input != ''
|
|
47
|
+
raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
|
48
|
+
else
|
|
49
|
+
return nil
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
end
|
|
53
|
-
|
|
53
|
+
|
|
54
54
|
def match(token_class)
|
|
55
55
|
if (m = token_class.regexp.match(@input))
|
|
56
56
|
@next_token = token_class.new(m[1])
|
|
57
|
-
|
|
57
|
+
@input = @input[m.end(0)..-1]
|
|
58
58
|
return true
|
|
59
59
|
else
|
|
60
60
|
return false
|
|
@@ -8,7 +8,7 @@ class NexusParser::Parser
|
|
|
8
8
|
|
|
9
9
|
def parse_file
|
|
10
10
|
# nf = @builder.new_nexus_file # create new local NexusParser instance, nf
|
|
11
|
-
blks = []
|
|
11
|
+
# blks = []
|
|
12
12
|
@lexer.pop(NexusParser::Tokens::NexusStart)
|
|
13
13
|
|
|
14
14
|
while @lexer.peek(NexusParser::Tokens::BeginBlk)
|
|
@@ -127,7 +127,8 @@ class NexusParser::Parser
|
|
|
127
127
|
break
|
|
128
128
|
else
|
|
129
129
|
@lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title) # not used at present
|
|
130
|
-
|
|
130
|
+
@lexer.pop(NexusParser::Tokens::LinkLine) if @lexer.peek(NexusParser::Tokens::LinkLine) # trashing these for now
|
|
131
|
+
|
|
131
132
|
parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)
|
|
132
133
|
parse_format if @lexer.peek(NexusParser::Tokens::Format)
|
|
133
134
|
|
|
@@ -137,6 +138,7 @@ class NexusParser::Parser
|
|
|
137
138
|
|
|
138
139
|
# handle "\s*OPTIONS MSTAXA = UNCERTAIN;\s\n" within a characters block (sticks in an infinite loop right now)
|
|
139
140
|
|
|
141
|
+
|
|
140
142
|
@lexer.pop(NexusParser::Tokens::MesquiteIDs) if @lexer.peek(NexusParser::Tokens::MesquiteIDs) # trashing these for now
|
|
141
143
|
@lexer.pop(NexusParser::Tokens::MesquiteBlockID) if @lexer.peek(NexusParser::Tokens::MesquiteBlockID) # trashing these for now
|
|
142
144
|
|
|
@@ -213,7 +215,7 @@ class NexusParser::Parser
|
|
|
213
215
|
|
|
214
216
|
opts.update({:index => (index - 1), :name => name})
|
|
215
217
|
|
|
216
|
-
raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index -1}.") if !opts[:name]
|
|
218
|
+
raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
|
|
217
219
|
@builder.update_chr(opts)
|
|
218
220
|
end
|
|
219
221
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
module NexusParser::Tokens
|
|
2
2
|
|
|
3
|
-
class Token
|
|
3
|
+
class Token
|
|
4
4
|
# this allows access the the class attribute regexp, without using a class variable
|
|
5
5
|
class << self; attr_reader :regexp; end
|
|
6
|
-
attr_reader :value
|
|
6
|
+
attr_reader :value
|
|
7
7
|
def initialize(str)
|
|
8
8
|
@value = str
|
|
9
9
|
end
|
|
@@ -11,7 +11,7 @@ module NexusParser::Tokens
|
|
|
11
11
|
|
|
12
12
|
# in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
|
|
13
13
|
# moving along popping off
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
class NexusStart < Token
|
|
16
16
|
@regexp = Regexp.new(/\A.*(\#nexus)\s*/i)
|
|
17
17
|
end
|
|
@@ -21,7 +21,7 @@ module NexusParser::Tokens
|
|
|
21
21
|
# @regexp = Regexp.new(/\A\s*(\[[^\]]*\])\s*/i)
|
|
22
22
|
# def initialize(str)
|
|
23
23
|
# str = str[1..-2] # strip the []
|
|
24
|
-
# str.strip!
|
|
24
|
+
# str.strip!
|
|
25
25
|
# @value = str
|
|
26
26
|
# end
|
|
27
27
|
# end
|
|
@@ -31,20 +31,20 @@ module NexusParser::Tokens
|
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
class EndBlk < Token
|
|
34
|
-
@regexp = Regexp.new(/\A\s*([\s
|
|
34
|
+
@regexp = Regexp.new(/\A\s*([\s]*End[\s]*;[\s]*)/i)
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
-
# label
|
|
37
|
+
# label
|
|
38
38
|
class AuthorsBlk < Token
|
|
39
|
-
@regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
|
|
39
|
+
@regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
-
# label
|
|
42
|
+
# label
|
|
43
43
|
class TaxaBlk < Token
|
|
44
44
|
@regexp = Regexp.new(/\A\s*(\s*Taxa\s*;)\s*/i)
|
|
45
45
|
end
|
|
46
46
|
|
|
47
|
-
# label
|
|
47
|
+
# label
|
|
48
48
|
class NotesBlk < Token
|
|
49
49
|
@regexp = Regexp.new(/\A\s*(\s*Notes\s*;)\s*/i)
|
|
50
50
|
end
|
|
@@ -66,9 +66,9 @@ module NexusParser::Tokens
|
|
|
66
66
|
@regexp = Regexp.new(/\A\s*(format)\s*/i)
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
-
# label
|
|
69
|
+
# label
|
|
70
70
|
class Taxlabels < Token
|
|
71
|
-
|
|
71
|
+
@regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
# same as ID
|
|
@@ -77,8 +77,8 @@ module NexusParser::Tokens
|
|
|
77
77
|
def initialize(str)
|
|
78
78
|
str.strip!
|
|
79
79
|
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
|
80
|
-
str = str[1..-2] if str[0..0] == '"'
|
|
81
|
-
str.strip!
|
|
80
|
+
str = str[1..-2] if str[0..0] == '"'
|
|
81
|
+
str.strip!
|
|
82
82
|
@value = str
|
|
83
83
|
end
|
|
84
84
|
end
|
|
@@ -87,15 +87,19 @@ module NexusParser::Tokens
|
|
|
87
87
|
@regexp = Regexp.new(/\A\s*(characters\s*;)\s*/i)
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
-
|
|
90
|
+
class LinkLine < Token
|
|
91
|
+
@regexp = Regexp.new(/\A\s*(link.*\s*;)\s*\n*/i)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# note we grab EOL and ; here
|
|
91
95
|
class ValuePair < Token
|
|
92
|
-
@regexp = Regexp.new(/\A\s*([\w
|
|
96
|
+
@regexp = Regexp.new(/\A\s*([\w]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s;]+)))[\s;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
|
|
93
97
|
def initialize(str)
|
|
94
98
|
str.strip!
|
|
95
99
|
str = str.split(/=/)
|
|
96
100
|
str[1].strip!
|
|
97
|
-
str[1] = str[1][1..-2] if str[1][0..0] == "'"
|
|
98
|
-
str[1] = str[1][1..-2] if str[1][0..0] == "\""
|
|
101
|
+
str[1] = str[1][1..-2] if str[1][0..0] == "'"
|
|
102
|
+
str[1] = str[1][1..-2] if str[1][0..0] == "\""
|
|
99
103
|
@value = {str[0].strip.downcase.to_sym => str[1].strip}
|
|
100
104
|
end
|
|
101
105
|
end
|
|
@@ -106,10 +110,10 @@ module NexusParser::Tokens
|
|
|
106
110
|
|
|
107
111
|
class RowVec < Token
|
|
108
112
|
@regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
+
def initialize(str)
|
|
114
|
+
# meh! Ruby is simpler to read than Perl?
|
|
115
|
+
# handles both () and {} style multistates
|
|
116
|
+
s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
|
|
113
117
|
@value = s
|
|
114
118
|
end
|
|
115
119
|
end
|
|
@@ -127,37 +131,37 @@ module NexusParser::Tokens
|
|
|
127
131
|
end
|
|
128
132
|
|
|
129
133
|
# unparsed blocks
|
|
130
|
-
|
|
134
|
+
|
|
131
135
|
class TreesBlk < Token
|
|
132
136
|
@regexp = Regexp.new(/\A\s*(trees;.*?END;)\s*/im) # note the multi-line /m
|
|
133
137
|
end
|
|
134
138
|
|
|
135
139
|
class SetsBlk < Token
|
|
136
|
-
@regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
|
|
140
|
+
@regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
|
|
137
141
|
end
|
|
138
142
|
|
|
139
143
|
class MqCharModelsBlk < Token
|
|
140
|
-
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
|
|
144
|
+
@regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
|
|
141
145
|
end
|
|
142
146
|
|
|
143
147
|
class LabelsBlk < Token
|
|
144
|
-
@regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
|
|
148
|
+
@regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
|
|
145
149
|
end
|
|
146
150
|
|
|
147
151
|
class AssumptionsBlk < Token
|
|
148
|
-
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
|
|
152
|
+
@regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
|
|
149
153
|
end
|
|
150
154
|
|
|
151
155
|
class CodonsBlk < Token
|
|
152
|
-
@regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
|
|
156
|
+
@regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
|
|
153
157
|
end
|
|
154
158
|
|
|
155
159
|
class MesquiteBlk < Token
|
|
156
|
-
@regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
|
|
160
|
+
@regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
|
|
157
161
|
end
|
|
158
162
|
|
|
159
163
|
class BlkEnd < Token
|
|
160
|
-
@regexp = Regexp.new(/\A[\s
|
|
164
|
+
@regexp = Regexp.new(/\A[\s]*(END;)\s*/i)
|
|
161
165
|
end
|
|
162
166
|
|
|
163
167
|
class LBracket < Token
|
|
@@ -169,13 +173,13 @@ module NexusParser::Tokens
|
|
|
169
173
|
end
|
|
170
174
|
|
|
171
175
|
class LParen < Token
|
|
172
|
-
|
|
176
|
+
@regexp = Regexp.new('\A\s*(\()\s*')
|
|
173
177
|
end
|
|
174
178
|
|
|
175
179
|
class RParen < Token
|
|
176
180
|
@regexp = Regexp.new('\A\s*(\))\s*')
|
|
177
181
|
end
|
|
178
|
-
|
|
182
|
+
|
|
179
183
|
class Equals < Token
|
|
180
184
|
@regexp = Regexp.new('\A\s*(=)\s*')
|
|
181
185
|
end
|
|
@@ -188,7 +192,7 @@ module NexusParser::Tokens
|
|
|
188
192
|
class ID < Token
|
|
189
193
|
@regexp = Regexp.new('\A\s*((\'[^\']+\')|(\w[^,:(); \t\n]*|_)+)\s*')
|
|
190
194
|
def initialize(str)
|
|
191
|
-
str.strip!
|
|
195
|
+
str.strip!
|
|
192
196
|
str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
|
|
193
197
|
@value = str
|
|
194
198
|
end
|
|
@@ -237,7 +241,7 @@ module NexusParser::Tokens
|
|
|
237
241
|
NexusParser::Tokens::LabelsBlk,
|
|
238
242
|
NexusParser::Tokens::TaxaBlk,
|
|
239
243
|
NexusParser::Tokens::NotesBlk,
|
|
240
|
-
NexusParser::Tokens::Title,
|
|
244
|
+
NexusParser::Tokens::Title,
|
|
241
245
|
NexusParser::Tokens::Taxlabels,
|
|
242
246
|
NexusParser::Tokens::Dimensions,
|
|
243
247
|
NexusParser::Tokens::FileLbl,
|
|
@@ -259,11 +263,12 @@ module NexusParser::Tokens
|
|
|
259
263
|
NexusParser::Tokens::RParen,
|
|
260
264
|
NexusParser::Tokens::LBracket,
|
|
261
265
|
NexusParser::Tokens::RBracket,
|
|
262
|
-
NexusParser::Tokens::Label, # must be before RowVec
|
|
266
|
+
NexusParser::Tokens::Label, # must be before RowVec
|
|
263
267
|
NexusParser::Tokens::RowVec,
|
|
268
|
+
NexusParser::Tokens::LinkLine,
|
|
264
269
|
NexusParser::Tokens::ID # need to trash this
|
|
265
|
-
]
|
|
270
|
+
]
|
|
266
271
|
end
|
|
267
|
-
|
|
272
|
+
|
|
268
273
|
end
|
|
269
274
|
|
data/lib/nexus_parser.rb
CHANGED
|
@@ -8,12 +8,12 @@
|
|
|
8
8
|
|
|
9
9
|
module NexusParser
|
|
10
10
|
|
|
11
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
|
12
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
|
13
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
|
14
|
-
|
|
15
|
-
class NexusParser
|
|
16
|
-
|
|
11
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'tokens'))
|
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'parser'))
|
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'nexus_parser', 'lexer'))
|
|
14
|
+
|
|
15
|
+
class NexusParser
|
|
16
|
+
|
|
17
17
|
attr_accessor :taxa, :characters, :sets, :codings, :vars, :notes
|
|
18
18
|
|
|
19
19
|
def initialize
|
|
@@ -26,7 +26,9 @@ class NexusParser
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
class Character
|
|
29
|
-
attr_accessor
|
|
29
|
+
attr_accessor :states, :notes
|
|
30
|
+
attr_writer :name
|
|
31
|
+
|
|
30
32
|
def initialize
|
|
31
33
|
@name = nil
|
|
32
34
|
@states = {}
|
|
@@ -39,7 +41,7 @@ class NexusParser
|
|
|
39
41
|
:name => ''
|
|
40
42
|
}.merge!(options)
|
|
41
43
|
return false if !@opt[:label]
|
|
42
|
-
|
|
44
|
+
|
|
43
45
|
@states.update(@opt[:label] => ChrState.new(@opt[:name]))
|
|
44
46
|
end
|
|
45
47
|
|
|
@@ -49,7 +51,7 @@ class NexusParser
|
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
def name
|
|
52
|
-
((@name == "") || (@name
|
|
54
|
+
((@name == "") || (@name.nil?)) ? "Undefined" : @name
|
|
53
55
|
end
|
|
54
56
|
end
|
|
55
57
|
|
|
@@ -70,17 +72,20 @@ class NexusParser
|
|
|
70
72
|
end
|
|
71
73
|
end
|
|
72
74
|
|
|
73
|
-
class Coding
|
|
74
|
-
# unfortunately we need this for notes
|
|
75
|
-
attr_accessor :
|
|
75
|
+
class Coding
|
|
76
|
+
# unfortunately we need this for notes
|
|
77
|
+
attr_accessor :notes
|
|
78
|
+
attr_writer :state
|
|
79
|
+
|
|
76
80
|
def initialize(options = {})
|
|
77
81
|
@states = options[:states]
|
|
78
|
-
@notes = []
|
|
82
|
+
@notes = []
|
|
79
83
|
end
|
|
80
84
|
|
|
81
85
|
def states
|
|
82
|
-
@states.class == Array ? @states
|
|
86
|
+
@states.class == Array ? @states : [@states]
|
|
83
87
|
end
|
|
88
|
+
|
|
84
89
|
end
|
|
85
90
|
|
|
86
91
|
class Note
|
|
@@ -98,7 +103,7 @@ class NexusParser
|
|
|
98
103
|
else
|
|
99
104
|
n = 'No text recovered, possible parsing error.'
|
|
100
105
|
end
|
|
101
|
-
|
|
106
|
+
|
|
102
107
|
# THIS IS A HACK for handling the TF = (CM <note>) format, I assume there will be other params in the future beyond CM, at that point move processing to the parser
|
|
103
108
|
if n[0..2] =~ /\A\s*\(\s*CM\s*/i
|
|
104
109
|
n.strip!
|
|
@@ -107,7 +112,7 @@ class NexusParser
|
|
|
107
112
|
n = n[2..-1] if n[0..1].downcase == "cm" # strip CM
|
|
108
113
|
n.strip!
|
|
109
114
|
n = n[1..-2] if n[0..0] == "'" # get rid of quote marks
|
|
110
|
-
n = n[1..-2] if n[0..0] == '"'
|
|
115
|
+
n = n[1..-2] if n[0..0] == '"'
|
|
111
116
|
end
|
|
112
117
|
n.strip
|
|
113
118
|
end
|
|
@@ -120,11 +125,11 @@ end
|
|
|
120
125
|
class Builder
|
|
121
126
|
|
|
122
127
|
def initialize
|
|
123
|
-
@nf = NexusParser.new
|
|
128
|
+
@nf = NexusParser.new
|
|
124
129
|
end
|
|
125
130
|
|
|
126
131
|
def stub_taxon
|
|
127
|
-
@nf.taxa.push(NexusParser::Taxon.new)
|
|
132
|
+
@nf.taxa.push(NexusParser::Taxon.new)
|
|
128
133
|
return @nf.taxa.size
|
|
129
134
|
end
|
|
130
135
|
|
|
@@ -138,11 +143,11 @@ class Builder
|
|
|
138
143
|
@nf.characters.each_with_index do |c, i|
|
|
139
144
|
@nf.codings[taxon_index.to_i] = [] if !@nf.codings[taxon_index.to_i]
|
|
140
145
|
@nf.codings[taxon_index.to_i][i] = NexusParser::Coding.new(:states => rowvector[i])
|
|
141
|
-
|
|
142
|
-
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
|
146
|
+
|
|
147
|
+
# !! we must update states for a given character if the state isn't found (not all states are referenced in description !!
|
|
143
148
|
|
|
144
149
|
existing_states = @nf.characters[i].state_labels
|
|
145
|
-
|
|
150
|
+
|
|
146
151
|
new_states = rowvector[i].class == Array ? rowvector[i].collect{|s| s.to_s} : [rowvector[i].to_s]
|
|
147
152
|
new_states.delete("?") # we don't add this to the db
|
|
148
153
|
new_states = new_states - existing_states
|
|
@@ -160,14 +165,12 @@ class Builder
|
|
|
160
165
|
end
|
|
161
166
|
@nf.vars.update(hash)
|
|
162
167
|
end
|
|
163
|
-
|
|
168
|
+
|
|
164
169
|
def update_taxon(options = {})
|
|
165
|
-
|
|
166
170
|
@opt = {
|
|
167
171
|
:name => ''
|
|
168
172
|
}.merge!(options)
|
|
169
|
-
return false if !@opt[:index]
|
|
170
|
-
|
|
173
|
+
return false if !@opt[:index]
|
|
171
174
|
(@nf.taxa[@opt[:index]].name = @opt[:name]) if @opt[:name]
|
|
172
175
|
end
|
|
173
176
|
|
|
@@ -179,29 +182,29 @@ class Builder
|
|
|
179
182
|
return false if !@opt[:index]
|
|
180
183
|
|
|
181
184
|
@index = @opt[:index].to_i
|
|
182
|
-
|
|
185
|
+
|
|
183
186
|
# need to create the characters
|
|
184
|
-
|
|
187
|
+
|
|
185
188
|
raise(NexusParser::ParseError, "Can't update character of index #{@index}, it doesn't exist! This is a problem parsing the character state labels. Check the indices. It may be for this character \"#{@opt[:name]}\".") if !@nf.characters[@index]
|
|
186
189
|
|
|
187
190
|
(@nf.characters[@index].name = @opt[:name]) if @opt[:name]
|
|
188
|
-
|
|
191
|
+
|
|
189
192
|
@opt.delete(:index)
|
|
190
193
|
@opt.delete(:name)
|
|
191
|
-
|
|
194
|
+
|
|
192
195
|
# the rest have states
|
|
193
196
|
@opt.keys.each do |k|
|
|
194
|
-
|
|
197
|
+
|
|
195
198
|
if (@nf.characters[@index].states != {}) && @nf.characters[@index].states[k] # state exists
|
|
196
|
-
|
|
199
|
+
|
|
197
200
|
## !! ONLY HANDLES NAME, UPDATE TO HANDLE notes etc. when we get them ##
|
|
198
201
|
update_state(@index, :index => k, :name => @opt[k])
|
|
199
|
-
|
|
202
|
+
|
|
200
203
|
else # doesn't, create it
|
|
201
204
|
@nf.characters[@index].add_state(:label => k.to_s, :name => @opt[k])
|
|
202
205
|
end
|
|
203
206
|
end
|
|
204
|
-
|
|
207
|
+
|
|
205
208
|
end
|
|
206
209
|
|
|
207
210
|
def update_state(chr_index, options = {})
|
|
@@ -220,11 +223,11 @@ class Builder
|
|
|
220
223
|
case @opt[:type]
|
|
221
224
|
|
|
222
225
|
# Why does mesquite differentiate b/w footnotes and annotations?!, apparently same data structure?
|
|
223
|
-
when 'TEXT' # a footnote
|
|
226
|
+
when 'TEXT' # a footnote
|
|
224
227
|
if @opt[:file]
|
|
225
228
|
@nf.notes << NexusParser::Note.new(@opt)
|
|
226
|
-
|
|
227
|
-
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
|
229
|
+
|
|
230
|
+
elsif @opt[:taxon] && @opt[:character] # its a cell, parse this case
|
|
228
231
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes = [] if !@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes
|
|
229
232
|
@nf.codings[@opt[:taxon].to_i - 1][@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
230
233
|
|
|
@@ -232,7 +235,7 @@ class Builder
|
|
|
232
235
|
@nf.taxa[@opt[:taxon].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
233
236
|
|
|
234
237
|
elsif @opt[:character] && !@opt[:taxon]
|
|
235
|
-
|
|
238
|
+
|
|
236
239
|
@nf.characters[@opt[:character].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
237
240
|
end
|
|
238
241
|
|
|
@@ -246,7 +249,7 @@ class Builder
|
|
|
246
249
|
@nf.characters[@opt[:c].to_i - 1].notes << NexusParser::Note.new(@opt)
|
|
247
250
|
end
|
|
248
251
|
end
|
|
249
|
-
|
|
252
|
+
|
|
250
253
|
end
|
|
251
254
|
|
|
252
255
|
def nexus_file
|
|
@@ -266,7 +269,7 @@ end # end module
|
|
|
266
269
|
def parse_nexus_file(input)
|
|
267
270
|
@input = input
|
|
268
271
|
@input.gsub!(/\[[^\]]*\]/,'') # strip out all comments BEFORE we parse the file
|
|
269
|
-
# quickly peek at the input, does this look like a Nexus file?
|
|
272
|
+
# quickly peek at the input, does this look like a Nexus file?
|
|
270
273
|
if !(@input =~ /\#Nexus/i) || !(@input =~ /Begin/i) || !(@input =~ /Matrix/i) || !(@input =~ /end\;/i)
|
|
271
274
|
raise(NexusParser::ParseError, "File is missing at least some required headers, check formatting.", caller)
|
|
272
275
|
end
|
|
@@ -274,7 +277,7 @@ def parse_nexus_file(input)
|
|
|
274
277
|
builder = NexusParser::Builder.new
|
|
275
278
|
lexer = NexusParser::Lexer.new(@input)
|
|
276
279
|
NexusParser::Parser.new(lexer, builder).parse_file
|
|
277
|
-
|
|
278
|
-
return builder.nexus_file
|
|
280
|
+
|
|
281
|
+
return builder.nexus_file
|
|
279
282
|
end
|
|
280
283
|
|