nexus_parser 1.1.4 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: be7f8d6dc4a222f456df1bb18dc3d63182cfb83b88ee036c227a93883c5ff70a
4
+ data.tar.gz: 1ab8785c3ca791476efe19d290ef25f20dc790792cf82fdee4ab1a0cd7468347
5
+ SHA512:
6
+ metadata.gz: e2c206656a9c8a1760e158641923b47f789ef42156fd9486dd75f70f92db670f31308adf739355eca6192ae3c702f6868b04fcab8ab87e7e44590371b3838bf7
7
+ data.tar.gz: fb4a0f18b0430dc04aa4feebd9d9ea46fb91b8557bc61efe7d75d4ad4427da1a0fa9f0632a3afe074fe5e47c17b18f9cd6a58786833b6c07a7181f819cece0d8
data/.gitignore CHANGED
@@ -13,9 +13,21 @@ tmtags
13
13
  ## VIM
14
14
  *.swp
15
15
 
16
+ ## BYEBUG
17
+ .byebug_history
18
+
16
19
  ## PROJECT::GENERAL
17
20
  coverage
18
21
  rdoc
19
22
  pkg
20
23
 
24
+ /.bundle/
25
+ /.yardoc
26
+ /_yardoc/
27
+ /coverage/
28
+ /doc/
29
+ /pkg/
30
+ /spec/reports/
31
+ /tmp/
32
+
21
33
  ## PROJECT::SPECIFIC
data/LICENSE CHANGED
@@ -1,20 +1,28 @@
1
- Copyright (c) 2009 mjy
1
+ Copyright (c) 2008- Matt Yoder. All rights reserved.
2
2
 
3
- Permission is hereby granted, free of charge, to any person obtaining
4
- a copy of this software and associated documentation files (the
5
- "Software"), to deal in the Software without restriction, including
6
- without limitation the rights to use, copy, modify, merge, publish,
7
- distribute, sublicense, and/or sell copies of the Software, and to
8
- permit persons to whom the Software is furnished to do so, subject to
9
- the following conditions:
3
+ Developed by: Matt Yoder, Species File Group, and Collaborators
4
+ University of Illinois
5
+ https://speciesfilegroup.org
10
6
 
11
- The above copyright notice and this permission notice shall be
12
- included in all copies or substantial portions of the Software.
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
8
+ this software and associated documentation files (the "Software"), to deal with
9
+ the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11
+ of the Software, and to permit persons to whom the Software is furnished to
12
+ do so, subject to the following conditions:
13
+ * Redistributions of source code must retain the above copyright notice,
14
+ this list of conditions and the following disclaimers.
15
+ * Redistributions in binary form must reproduce the above copyright notice,
16
+ this list of conditions and the following disclaimers in the documentation
17
+ and/or other materials provided with the distribution.
18
+ * Neither the names of <NAME OF DEVELOPMENT GROUP>, <NAME OF INSTITUTION>,
19
+ nor the names of its contributors may be used to endorse or promote products
20
+ derived from this Software without specific prior written permission.
13
21
 
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
28
+ SOFTWARE.
@@ -1,8 +1,10 @@
1
- = nexus_parser
1
+ # nexus_parser
2
+
3
+ A Ruby lexer/parser for [nexus](https://en.wikipedia.org/wiki/Nexus_file) files, as used in phylogenetic analysis in taxonomy.
2
4
 
3
5
  See the test files for usage for now, lots of examples there.
4
6
 
5
- == Note on Patches/Pull Requests
7
+ ## Note on Patches/Pull Requests
6
8
 
7
9
  * Fork the project.
8
10
  * Make your feature addition or bug fix.
@@ -10,8 +12,8 @@ See the test files for usage for now, lots of examples there.
10
12
  future version unintentionally.
11
13
  * Commit, do not mess with rakefile, version, or history.
12
14
  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
- * Send me a pull request. Bonus points for topic branches.
15
+ * Make a pull request.
14
16
 
15
- == Copyright
17
+ ## License
16
18
 
17
- Copyright (c) 2010 Matt Yoder. See LICENSE for details.
19
+ `nexus_parser` is open source and is now available under the [University of Illinois/NCSA Open Source License](https://en.wikipedia.org/wiki/University_of_Illinois/NCSA_Open_Source_License).
data/Rakefile CHANGED
@@ -1,23 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
1
4
  require 'rubygems'
2
5
  require 'rake'
3
6
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "nexus_parser"
8
- gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
9
- gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
10
- gem.email = "diapriid@gmail.com"
11
- gem.homepage = "http://github.com/mjy/nexus_parser"
12
- gem.authors = ["mjy"]
13
- # gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- end
16
- Jeweler::GemcutterTasks.new
17
- rescue LoadError
18
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
- end
20
-
21
7
  require 'rake/testtask'
22
8
  Rake::TestTask.new(:test) do |test|
23
9
  test.libs << 'lib' << 'test'
@@ -38,13 +24,11 @@ rescue LoadError
38
24
  end
39
25
  end
40
26
 
41
- task :test => :check_dependencies
42
-
43
27
  task :default => :test
44
28
 
45
- require 'rake/rdoctask'
29
+ require 'rdoc/task'
46
30
  Rake::RDocTask.new do |rdoc|
47
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
31
+ version = NexusParser::VERSION
48
32
 
49
33
  rdoc.rdoc_dir = 'rdoc'
50
34
  rdoc.title = "nexus_parser #{version}"
@@ -8,8 +8,8 @@ class NexusParser::Lexer
8
8
  @input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
9
9
  @next_token = nil
10
10
  end
11
-
12
- # checks whether the next token is of the specified class.
11
+
12
+ # checks whether the next token is of the specified class.
13
13
  def peek(token_class)
14
14
  token = read_next_token(token_class)
15
15
  return token.class == token_class
@@ -21,18 +21,18 @@ class NexusParser::Lexer
21
21
  token = read_next_token(token_class)
22
22
  @next_token = nil
23
23
  if token.class != token_class
24
- raise(NexusParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..40]}...", caller)
24
+ raise(NexusParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..40]}...", caller)
25
25
  else
26
- return token
26
+ return token
27
27
  end
28
28
  end
29
-
29
+
30
30
  private
31
- # read (and store) the next token from the input, if it has not already been read.
32
- def read_next_token(token_class)
33
- if @next_token
34
- return @next_token
35
- else
31
+ # read (and store) the next token from the input, if it has not already been read.
32
+ def read_next_token(token_class)
33
+ if @next_token
34
+ return @next_token
35
+ else
36
36
  # check for a match on the specified class first
37
37
  if match(token_class)
38
38
  return @next_token
@@ -42,19 +42,19 @@ class NexusParser::Lexer
42
42
  return @next_token if match(t)
43
43
  }
44
44
  end
45
- # no match, either end of string or lex-error
46
- if @input != ''
47
- raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
48
- else
49
- return nil
45
+ # no match, either end of string or lex-error
46
+ if @input != ''
47
+ raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
48
+ else
49
+ return nil
50
50
  end
51
51
  end
52
52
  end
53
-
53
+
54
54
  def match(token_class)
55
55
  if (m = token_class.regexp.match(@input))
56
56
  @next_token = token_class.new(m[1])
57
- @input = @input[m.end(0)..-1]
57
+ @input = @input[m.end(0)..-1]
58
58
  return true
59
59
  else
60
60
  return false
@@ -8,7 +8,7 @@ class NexusParser::Parser
8
8
 
9
9
  def parse_file
10
10
  # nf = @builder.new_nexus_file # create new local NexusParser instance, nf
11
- blks = []
11
+ # blks = []
12
12
  @lexer.pop(NexusParser::Tokens::NexusStart)
13
13
 
14
14
  while @lexer.peek(NexusParser::Tokens::BeginBlk)
@@ -151,8 +151,10 @@ class NexusParser::Parser
151
151
  # prolly pop header then fuse with parse_dimensions
152
152
  def parse_format
153
153
  @lexer.pop(NexusParser::Tokens::Format)
154
- while @lexer.peek(NexusParser::Tokens::ValuePair)
155
- @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
154
+
155
+ while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
156
+ @lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
157
+ @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) if @lexer.peek(NexusParser::Tokens::ValuePair)
156
158
  end
157
159
 
158
160
  check_initialization_of_ntax_nchar
@@ -215,7 +217,7 @@ class NexusParser::Parser
215
217
 
216
218
  opts.update({:index => (index - 1), :name => name})
217
219
 
218
- raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index -1}.") if !opts[:name]
220
+ raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
219
221
  @builder.update_chr(opts)
220
222
  end
221
223
 
@@ -1,9 +1,11 @@
1
1
  module NexusParser::Tokens
2
2
 
3
- class Token
3
+ ENDBLKSTR = '(end|endblock)'.freeze
4
+
5
+ class Token
4
6
  # this allows access the the class attribute regexp, without using a class variable
5
7
  class << self; attr_reader :regexp; end
6
- attr_reader :value
8
+ attr_reader :value
7
9
  def initialize(str)
8
10
  @value = str
9
11
  end
@@ -11,7 +13,7 @@ module NexusParser::Tokens
11
13
 
12
14
  # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
13
15
  # moving along popping off
14
-
16
+
15
17
  class NexusStart < Token
16
18
  @regexp = Regexp.new(/\A.*(\#nexus)\s*/i)
17
19
  end
@@ -21,7 +23,7 @@ module NexusParser::Tokens
21
23
  # @regexp = Regexp.new(/\A\s*(\[[^\]]*\])\s*/i)
22
24
  # def initialize(str)
23
25
  # str = str[1..-2] # strip the []
24
- # str.strip!
26
+ # str.strip!
25
27
  # @value = str
26
28
  # end
27
29
  # end
@@ -31,20 +33,20 @@ module NexusParser::Tokens
31
33
  end
32
34
 
33
35
  class EndBlk < Token
34
- @regexp = Regexp.new(/\A\s*([\s\n]*End[\s\n]*;[\s\n]*)/i)
36
+ @regexp = Regexp.new(/\A\s*([\s]*#{ENDBLKSTR}[\s]*;[\s]*)/i)
35
37
  end
36
38
 
37
- # label
39
+ # label
38
40
  class AuthorsBlk < Token
39
- @regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
41
+ @regexp = Regexp.new(/\A\s*(Authors;.*?#{ENDBLKSTR};)\s*/im)
40
42
  end
41
43
 
42
- # label
44
+ # label
43
45
  class TaxaBlk < Token
44
46
  @regexp = Regexp.new(/\A\s*(\s*Taxa\s*;)\s*/i)
45
47
  end
46
48
 
47
- # label
49
+ # label
48
50
  class NotesBlk < Token
49
51
  @regexp = Regexp.new(/\A\s*(\s*Notes\s*;)\s*/i)
50
52
  end
@@ -66,9 +68,14 @@ module NexusParser::Tokens
66
68
  @regexp = Regexp.new(/\A\s*(format)\s*/i)
67
69
  end
68
70
 
69
- # label
71
+ # TODO: Handled, but ignored
72
+ class RespectCase < Token
73
+ @regexp = Regexp.new(/\A\s*(respectcase)\s*/i)
74
+ end
75
+
76
+ # label
70
77
  class Taxlabels < Token
71
- @regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
78
+ @regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
72
79
  end
73
80
 
74
81
  # same as ID
@@ -77,8 +84,8 @@ module NexusParser::Tokens
77
84
  def initialize(str)
78
85
  str.strip!
79
86
  str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
80
- str = str[1..-2] if str[0..0] == '"'
81
- str.strip!
87
+ str = str[1..-2] if str[0..0] == '"'
88
+ str.strip!
82
89
  @value = str
83
90
  end
84
91
  end
@@ -91,15 +98,15 @@ module NexusParser::Tokens
91
98
  @regexp = Regexp.new(/\A\s*(link.*\s*;)\s*\n*/i)
92
99
  end
93
100
 
94
- # note we grab EOL and ; here
101
+ # note we grab EOL and ; here
95
102
  class ValuePair < Token
96
- @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
103
+ @regexp = Regexp.new(/\A\s*([\w]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s;]+)))[\s;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
97
104
  def initialize(str)
98
105
  str.strip!
99
106
  str = str.split(/=/)
100
107
  str[1].strip!
101
- str[1] = str[1][1..-2] if str[1][0..0] == "'"
102
- str[1] = str[1][1..-2] if str[1][0..0] == "\""
108
+ str[1] = str[1][1..-2] if str[1][0..0] == "'"
109
+ str[1] = str[1][1..-2] if str[1][0..0] == "\""
103
110
  @value = {str[0].strip.downcase.to_sym => str[1].strip}
104
111
  end
105
112
  end
@@ -110,10 +117,10 @@ module NexusParser::Tokens
110
117
 
111
118
  class RowVec < Token
112
119
  @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
113
- def initialize(str)
114
- # meh! Ruby is simpler to read than Perl?
115
- # handles both () and {} style multistates
116
- s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
120
+ def initialize(str)
121
+ # meh! Ruby is simpler to read than Perl?
122
+ # handles both () and {} style multistates
123
+ s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
117
124
  @value = s
118
125
  end
119
126
  end
@@ -131,37 +138,37 @@ module NexusParser::Tokens
131
138
  end
132
139
 
133
140
  # unparsed blocks
134
-
141
+
135
142
  class TreesBlk < Token
136
- @regexp = Regexp.new(/\A\s*(trees;.*?END;)\s*/im) # note the multi-line /m
143
+ @regexp = Regexp.new(/\A\s*(trees;.*?#{ENDBLKSTR};)\s*/im) # note the multi-line /m
137
144
  end
138
145
 
139
146
  class SetsBlk < Token
140
- @regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
147
+ @regexp = Regexp.new(/\A\s*(sets;.*?#{ENDBLKSTR};)\s*/im)
141
148
  end
142
149
 
143
150
  class MqCharModelsBlk < Token
144
- @regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
151
+ @regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?#{ENDBLKSTR};)\s*/im)
145
152
  end
146
153
 
147
154
  class LabelsBlk < Token
148
- @regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
155
+ @regexp = Regexp.new(/\A\s*(LABELS;.*?#{ENDBLKSTR};)\s*/im)
149
156
  end
150
157
 
151
158
  class AssumptionsBlk < Token
152
- @regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
159
+ @regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?#{ENDBLKSTR};)\s*/im)
153
160
  end
154
161
 
155
162
  class CodonsBlk < Token
156
- @regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
163
+ @regexp = Regexp.new(/\A\s*(CODONS;.*?#{ENDBLKSTR};)\s*/im)
157
164
  end
158
165
 
159
166
  class MesquiteBlk < Token
160
- @regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
167
+ @regexp = Regexp.new(/\A\s*(Mesquite;.*?#{ENDBLKSTR};)\s*/im)
161
168
  end
162
169
 
163
170
  class BlkEnd < Token
164
- @regexp = Regexp.new(/\A[\s\n]*(END;)\s*/i)
171
+ @regexp = Regexp.new(/\A[\s]*(#{ENDBLKSTR};)\s*/i)
165
172
  end
166
173
 
167
174
  class LBracket < Token
@@ -173,13 +180,13 @@ module NexusParser::Tokens
173
180
  end
174
181
 
175
182
  class LParen < Token
176
- @regexp = Regexp.new('\A\s*(\()\s*')
183
+ @regexp = Regexp.new('\A\s*(\()\s*')
177
184
  end
178
185
 
179
186
  class RParen < Token
180
187
  @regexp = Regexp.new('\A\s*(\))\s*')
181
188
  end
182
-
189
+
183
190
  class Equals < Token
184
191
  @regexp = Regexp.new('\A\s*(=)\s*')
185
192
  end
@@ -192,7 +199,7 @@ module NexusParser::Tokens
192
199
  class ID < Token
193
200
  @regexp = Regexp.new('\A\s*((\'[^\']+\')|(\w[^,:(); \t\n]*|_)+)\s*')
194
201
  def initialize(str)
195
- str.strip!
202
+ str.strip!
196
203
  str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
197
204
  @value = str
198
205
  end
@@ -241,11 +248,12 @@ module NexusParser::Tokens
241
248
  NexusParser::Tokens::LabelsBlk,
242
249
  NexusParser::Tokens::TaxaBlk,
243
250
  NexusParser::Tokens::NotesBlk,
244
- NexusParser::Tokens::Title,
251
+ NexusParser::Tokens::Title,
245
252
  NexusParser::Tokens::Taxlabels,
246
253
  NexusParser::Tokens::Dimensions,
247
254
  NexusParser::Tokens::FileLbl,
248
255
  NexusParser::Tokens::Format,
256
+ NexusParser::Tokens::RespectCase,
249
257
  NexusParser::Tokens::Equals,
250
258
  NexusParser::Tokens::ValuePair, # this has bad overlap with Label and likely IDs (need to kill the latter, its a lesser Label)
251
259
  NexusParser::Tokens::CharStateLabels,
@@ -263,12 +271,12 @@ module NexusParser::Tokens
263
271
  NexusParser::Tokens::RParen,
264
272
  NexusParser::Tokens::LBracket,
265
273
  NexusParser::Tokens::RBracket,
266
- NexusParser::Tokens::Label, # must be before RowVec
274
+ NexusParser::Tokens::Label, # must be before RowVec
267
275
  NexusParser::Tokens::RowVec,
268
276
  NexusParser::Tokens::LinkLine,
269
277
  NexusParser::Tokens::ID # need to trash this
270
- ]
278
+ ]
271
279
  end
272
-
280
+
273
281
  end
274
282
 
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NexusParser
4
+ VERSION = "1.2.1"
5
+ end