nexus_parser 1.1.4 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: be7f8d6dc4a222f456df1bb18dc3d63182cfb83b88ee036c227a93883c5ff70a
4
+ data.tar.gz: 1ab8785c3ca791476efe19d290ef25f20dc790792cf82fdee4ab1a0cd7468347
5
+ SHA512:
6
+ metadata.gz: e2c206656a9c8a1760e158641923b47f789ef42156fd9486dd75f70f92db670f31308adf739355eca6192ae3c702f6868b04fcab8ab87e7e44590371b3838bf7
7
+ data.tar.gz: fb4a0f18b0430dc04aa4feebd9d9ea46fb91b8557bc61efe7d75d4ad4427da1a0fa9f0632a3afe074fe5e47c17b18f9cd6a58786833b6c07a7181f819cece0d8
data/.gitignore CHANGED
@@ -13,9 +13,21 @@ tmtags
13
13
  ## VIM
14
14
  *.swp
15
15
 
16
+ ## BYEBUG
17
+ .byebug_history
18
+
16
19
  ## PROJECT::GENERAL
17
20
  coverage
18
21
  rdoc
19
22
  pkg
20
23
 
24
+ /.bundle/
25
+ /.yardoc
26
+ /_yardoc/
27
+ /coverage/
28
+ /doc/
29
+ /pkg/
30
+ /spec/reports/
31
+ /tmp/
32
+
21
33
  ## PROJECT::SPECIFIC
data/LICENSE CHANGED
@@ -1,20 +1,28 @@
1
- Copyright (c) 2009 mjy
1
+ Copyright (c) 2008- Matt Yoder. All rights reserved.
2
2
 
3
- Permission is hereby granted, free of charge, to any person obtaining
4
- a copy of this software and associated documentation files (the
5
- "Software"), to deal in the Software without restriction, including
6
- without limitation the rights to use, copy, modify, merge, publish,
7
- distribute, sublicense, and/or sell copies of the Software, and to
8
- permit persons to whom the Software is furnished to do so, subject to
9
- the following conditions:
3
+ Developed by: Matt Yoder, Species File Group, and Collaborators
4
+ University of Illinois
5
+ https://speciesfilegroup.org
10
6
 
11
- The above copyright notice and this permission notice shall be
12
- included in all copies or substantial portions of the Software.
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
8
+ this software and associated documentation files (the "Software"), to deal with
9
+ the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11
+ of the Software, and to permit persons to whom the Software is furnished to
12
+ do so, subject to the following conditions:
13
+ * Redistributions of source code must retain the above copyright notice,
14
+ this list of conditions and the following disclaimers.
15
+ * Redistributions in binary form must reproduce the above copyright notice,
16
+ this list of conditions and the following disclaimers in the documentation
17
+ and/or other materials provided with the distribution.
18
+ * Neither the names of <NAME OF DEVELOPMENT GROUP>, <NAME OF INSTITUTION>,
19
+ nor the names of its contributors may be used to endorse or promote products
20
+ derived from this Software without specific prior written permission.
13
21
 
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
28
+ SOFTWARE.
@@ -1,8 +1,10 @@
1
- = nexus_parser
1
+ # nexus_parser
2
+
3
+ A Ruby lexer/parser for [nexus](https://en.wikipedia.org/wiki/Nexus_file) files, as used in phylogenetic analysis in taxonomy.
2
4
 
3
5
  See the test files for usage for now, lots of examples there.
4
6
 
5
- == Note on Patches/Pull Requests
7
+ ## Note on Patches/Pull Requests
6
8
 
7
9
  * Fork the project.
8
10
  * Make your feature addition or bug fix.
@@ -10,8 +12,8 @@ See the test files for usage for now, lots of examples there.
10
12
  future version unintentionally.
11
13
  * Commit, do not mess with rakefile, version, or history.
12
14
  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
- * Send me a pull request. Bonus points for topic branches.
15
+ * Make a pull request.
14
16
 
15
- == Copyright
17
+ ## License
16
18
 
17
- Copyright (c) 2010 Matt Yoder. See LICENSE for details.
19
+ `nexus_parser` is open source and is now available under the [University of Illinois/NCSA Open Source License](https://en.wikipedia.org/wiki/University_of_Illinois/NCSA_Open_Source_License).
data/Rakefile CHANGED
@@ -1,23 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
1
4
  require 'rubygems'
2
5
  require 'rake'
3
6
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "nexus_parser"
8
- gem.summary = %Q{A Nexus file format (phylogenetic inference) parser in Ruby.}
9
- gem.description = %Q{A full featured and extensible Nexus file parser in Ruby. }
10
- gem.email = "diapriid@gmail.com"
11
- gem.homepage = "http://github.com/mjy/nexus_parser"
12
- gem.authors = ["mjy"]
13
- # gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- end
16
- Jeweler::GemcutterTasks.new
17
- rescue LoadError
18
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
- end
20
-
21
7
  require 'rake/testtask'
22
8
  Rake::TestTask.new(:test) do |test|
23
9
  test.libs << 'lib' << 'test'
@@ -38,13 +24,11 @@ rescue LoadError
38
24
  end
39
25
  end
40
26
 
41
- task :test => :check_dependencies
42
-
43
27
  task :default => :test
44
28
 
45
- require 'rake/rdoctask'
29
+ require 'rdoc/task'
46
30
  Rake::RDocTask.new do |rdoc|
47
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
31
+ version = NexusParser::VERSION
48
32
 
49
33
  rdoc.rdoc_dir = 'rdoc'
50
34
  rdoc.title = "nexus_parser #{version}"
@@ -8,8 +8,8 @@ class NexusParser::Lexer
8
8
  @input.gsub!(/\x0D/,"") # get rid of possible dos carrige returns
9
9
  @next_token = nil
10
10
  end
11
-
12
- # checks whether the next token is of the specified class.
11
+
12
+ # checks whether the next token is of the specified class.
13
13
  def peek(token_class)
14
14
  token = read_next_token(token_class)
15
15
  return token.class == token_class
@@ -21,18 +21,18 @@ class NexusParser::Lexer
21
21
  token = read_next_token(token_class)
22
22
  @next_token = nil
23
23
  if token.class != token_class
24
- raise(NexusParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..40]}...", caller)
24
+ raise(NexusParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..40]}...", caller)
25
25
  else
26
- return token
26
+ return token
27
27
  end
28
28
  end
29
-
29
+
30
30
  private
31
- # read (and store) the next token from the input, if it has not already been read.
32
- def read_next_token(token_class)
33
- if @next_token
34
- return @next_token
35
- else
31
+ # read (and store) the next token from the input, if it has not already been read.
32
+ def read_next_token(token_class)
33
+ if @next_token
34
+ return @next_token
35
+ else
36
36
  # check for a match on the specified class first
37
37
  if match(token_class)
38
38
  return @next_token
@@ -42,19 +42,19 @@ class NexusParser::Lexer
42
42
  return @next_token if match(t)
43
43
  }
44
44
  end
45
- # no match, either end of string or lex-error
46
- if @input != ''
47
- raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
48
- else
49
- return nil
45
+ # no match, either end of string or lex-error
46
+ if @input != ''
47
+ raise( NexusParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
48
+ else
49
+ return nil
50
50
  end
51
51
  end
52
52
  end
53
-
53
+
54
54
  def match(token_class)
55
55
  if (m = token_class.regexp.match(@input))
56
56
  @next_token = token_class.new(m[1])
57
- @input = @input[m.end(0)..-1]
57
+ @input = @input[m.end(0)..-1]
58
58
  return true
59
59
  else
60
60
  return false
@@ -8,7 +8,7 @@ class NexusParser::Parser
8
8
 
9
9
  def parse_file
10
10
  # nf = @builder.new_nexus_file # create new local NexusParser instance, nf
11
- blks = []
11
+ # blks = []
12
12
  @lexer.pop(NexusParser::Tokens::NexusStart)
13
13
 
14
14
  while @lexer.peek(NexusParser::Tokens::BeginBlk)
@@ -151,8 +151,10 @@ class NexusParser::Parser
151
151
  # prolly pop header then fuse with parse_dimensions
152
152
  def parse_format
153
153
  @lexer.pop(NexusParser::Tokens::Format)
154
- while @lexer.peek(NexusParser::Tokens::ValuePair)
155
- @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
154
+
155
+ while @lexer.peek(NexusParser::Tokens::ValuePair) || @lexer.peek(NexusParser::Tokens::RespectCase)
156
+ @lexer.pop(NexusParser::Tokens::RespectCase) if @lexer.peek(NexusParser::Tokens::RespectCase) # !! TODO: nothing is set, respect case is ignored
157
+ @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value) if @lexer.peek(NexusParser::Tokens::ValuePair)
156
158
  end
157
159
 
158
160
  check_initialization_of_ntax_nchar
@@ -215,7 +217,7 @@ class NexusParser::Parser
215
217
 
216
218
  opts.update({:index => (index - 1), :name => name})
217
219
 
218
- raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index -1}.") if !opts[:name]
220
+ raise(NexusParser::ParseError, "Error parsing character state labels for (or around) character #{index - 1}.") if !opts[:name]
219
221
  @builder.update_chr(opts)
220
222
  end
221
223
 
@@ -1,9 +1,11 @@
1
1
  module NexusParser::Tokens
2
2
 
3
- class Token
3
+ ENDBLKSTR = '(end|endblock)'.freeze
4
+
5
+ class Token
4
6
  # this allows access the the class attribute regexp, without using a class variable
5
7
  class << self; attr_reader :regexp; end
6
- attr_reader :value
8
+ attr_reader :value
7
9
  def initialize(str)
8
10
  @value = str
9
11
  end
@@ -11,7 +13,7 @@ module NexusParser::Tokens
11
13
 
12
14
  # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
13
15
  # moving along popping off
14
-
16
+
15
17
  class NexusStart < Token
16
18
  @regexp = Regexp.new(/\A.*(\#nexus)\s*/i)
17
19
  end
@@ -21,7 +23,7 @@ module NexusParser::Tokens
21
23
  # @regexp = Regexp.new(/\A\s*(\[[^\]]*\])\s*/i)
22
24
  # def initialize(str)
23
25
  # str = str[1..-2] # strip the []
24
- # str.strip!
26
+ # str.strip!
25
27
  # @value = str
26
28
  # end
27
29
  # end
@@ -31,20 +33,20 @@ module NexusParser::Tokens
31
33
  end
32
34
 
33
35
  class EndBlk < Token
34
- @regexp = Regexp.new(/\A\s*([\s\n]*End[\s\n]*;[\s\n]*)/i)
36
+ @regexp = Regexp.new(/\A\s*([\s]*#{ENDBLKSTR}[\s]*;[\s]*)/i)
35
37
  end
36
38
 
37
- # label
39
+ # label
38
40
  class AuthorsBlk < Token
39
- @regexp = Regexp.new(/\A\s*(Authors;.*?END;)\s*/im)
41
+ @regexp = Regexp.new(/\A\s*(Authors;.*?#{ENDBLKSTR};)\s*/im)
40
42
  end
41
43
 
42
- # label
44
+ # label
43
45
  class TaxaBlk < Token
44
46
  @regexp = Regexp.new(/\A\s*(\s*Taxa\s*;)\s*/i)
45
47
  end
46
48
 
47
- # label
49
+ # label
48
50
  class NotesBlk < Token
49
51
  @regexp = Regexp.new(/\A\s*(\s*Notes\s*;)\s*/i)
50
52
  end
@@ -66,9 +68,14 @@ module NexusParser::Tokens
66
68
  @regexp = Regexp.new(/\A\s*(format)\s*/i)
67
69
  end
68
70
 
69
- # label
71
+ # TODO: Handled, but ignored
72
+ class RespectCase < Token
73
+ @regexp = Regexp.new(/\A\s*(respectcase)\s*/i)
74
+ end
75
+
76
+ # label
70
77
  class Taxlabels < Token
71
- @regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
78
+ @regexp = Regexp.new(/\A\s*(\s*taxlabels\s*)\s*/i)
72
79
  end
73
80
 
74
81
  # same as ID
@@ -77,8 +84,8 @@ module NexusParser::Tokens
77
84
  def initialize(str)
78
85
  str.strip!
79
86
  str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
80
- str = str[1..-2] if str[0..0] == '"'
81
- str.strip!
87
+ str = str[1..-2] if str[0..0] == '"'
88
+ str.strip!
82
89
  @value = str
83
90
  end
84
91
  end
@@ -91,15 +98,15 @@ module NexusParser::Tokens
91
98
  @regexp = Regexp.new(/\A\s*(link.*\s*;)\s*\n*/i)
92
99
  end
93
100
 
94
- # note we grab EOL and ; here
101
+ # note we grab EOL and ; here
95
102
  class ValuePair < Token
96
- @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
103
+ @regexp = Regexp.new(/\A\s*([\w]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s;]+)))[\s;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
97
104
  def initialize(str)
98
105
  str.strip!
99
106
  str = str.split(/=/)
100
107
  str[1].strip!
101
- str[1] = str[1][1..-2] if str[1][0..0] == "'"
102
- str[1] = str[1][1..-2] if str[1][0..0] == "\""
108
+ str[1] = str[1][1..-2] if str[1][0..0] == "'"
109
+ str[1] = str[1][1..-2] if str[1][0..0] == "\""
103
110
  @value = {str[0].strip.downcase.to_sym => str[1].strip}
104
111
  end
105
112
  end
@@ -110,10 +117,10 @@ module NexusParser::Tokens
110
117
 
111
118
  class RowVec < Token
112
119
  @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
113
- def initialize(str)
114
- # meh! Ruby is simpler to read than Perl?
115
- # handles both () and {} style multistates
116
- s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
120
+ def initialize(str)
121
+ # meh! Ruby is simpler to read than Perl?
122
+ # handles both () and {} style multistates
123
+ s = str.split(/\(|\)|\}|\{/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
117
124
  @value = s
118
125
  end
119
126
  end
@@ -131,37 +138,37 @@ module NexusParser::Tokens
131
138
  end
132
139
 
133
140
  # unparsed blocks
134
-
141
+
135
142
  class TreesBlk < Token
136
- @regexp = Regexp.new(/\A\s*(trees;.*?END;)\s*/im) # note the multi-line /m
143
+ @regexp = Regexp.new(/\A\s*(trees;.*?#{ENDBLKSTR};)\s*/im) # note the multi-line /m
137
144
  end
138
145
 
139
146
  class SetsBlk < Token
140
- @regexp = Regexp.new(/\A\s*(sets;.*?END;)\s*/im)
147
+ @regexp = Regexp.new(/\A\s*(sets;.*?#{ENDBLKSTR};)\s*/im)
141
148
  end
142
149
 
143
150
  class MqCharModelsBlk < Token
144
- @regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?END;)\s*/im)
151
+ @regexp = Regexp.new(/\A\s*(MESQUITECHARMODELS;.*?#{ENDBLKSTR};)\s*/im)
145
152
  end
146
153
 
147
154
  class LabelsBlk < Token
148
- @regexp = Regexp.new(/\A\s*(LABELS;.*?END;)\s*/im)
155
+ @regexp = Regexp.new(/\A\s*(LABELS;.*?#{ENDBLKSTR};)\s*/im)
149
156
  end
150
157
 
151
158
  class AssumptionsBlk < Token
152
- @regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?END;)\s*/im)
159
+ @regexp = Regexp.new(/\A\s*(ASSUMPTIONS;.*?#{ENDBLKSTR};)\s*/im)
153
160
  end
154
161
 
155
162
  class CodonsBlk < Token
156
- @regexp = Regexp.new(/\A\s*(CODONS;.*?END;)\s*/im)
163
+ @regexp = Regexp.new(/\A\s*(CODONS;.*?#{ENDBLKSTR};)\s*/im)
157
164
  end
158
165
 
159
166
  class MesquiteBlk < Token
160
- @regexp = Regexp.new(/\A\s*(Mesquite;.*?END;)\s*/im)
167
+ @regexp = Regexp.new(/\A\s*(Mesquite;.*?#{ENDBLKSTR};)\s*/im)
161
168
  end
162
169
 
163
170
  class BlkEnd < Token
164
- @regexp = Regexp.new(/\A[\s\n]*(END;)\s*/i)
171
+ @regexp = Regexp.new(/\A[\s]*(#{ENDBLKSTR};)\s*/i)
165
172
  end
166
173
 
167
174
  class LBracket < Token
@@ -173,13 +180,13 @@ module NexusParser::Tokens
173
180
  end
174
181
 
175
182
  class LParen < Token
176
- @regexp = Regexp.new('\A\s*(\()\s*')
183
+ @regexp = Regexp.new('\A\s*(\()\s*')
177
184
  end
178
185
 
179
186
  class RParen < Token
180
187
  @regexp = Regexp.new('\A\s*(\))\s*')
181
188
  end
182
-
189
+
183
190
  class Equals < Token
184
191
  @regexp = Regexp.new('\A\s*(=)\s*')
185
192
  end
@@ -192,7 +199,7 @@ module NexusParser::Tokens
192
199
  class ID < Token
193
200
  @regexp = Regexp.new('\A\s*((\'[^\']+\')|(\w[^,:(); \t\n]*|_)+)\s*')
194
201
  def initialize(str)
195
- str.strip!
202
+ str.strip!
196
203
  str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
197
204
  @value = str
198
205
  end
@@ -241,11 +248,12 @@ module NexusParser::Tokens
241
248
  NexusParser::Tokens::LabelsBlk,
242
249
  NexusParser::Tokens::TaxaBlk,
243
250
  NexusParser::Tokens::NotesBlk,
244
- NexusParser::Tokens::Title,
251
+ NexusParser::Tokens::Title,
245
252
  NexusParser::Tokens::Taxlabels,
246
253
  NexusParser::Tokens::Dimensions,
247
254
  NexusParser::Tokens::FileLbl,
248
255
  NexusParser::Tokens::Format,
256
+ NexusParser::Tokens::RespectCase,
249
257
  NexusParser::Tokens::Equals,
250
258
  NexusParser::Tokens::ValuePair, # this has bad overlap with Label and likely IDs (need to kill the latter, its a lesser Label)
251
259
  NexusParser::Tokens::CharStateLabels,
@@ -263,12 +271,12 @@ module NexusParser::Tokens
263
271
  NexusParser::Tokens::RParen,
264
272
  NexusParser::Tokens::LBracket,
265
273
  NexusParser::Tokens::RBracket,
266
- NexusParser::Tokens::Label, # must be before RowVec
274
+ NexusParser::Tokens::Label, # must be before RowVec
267
275
  NexusParser::Tokens::RowVec,
268
276
  NexusParser::Tokens::LinkLine,
269
277
  NexusParser::Tokens::ID # need to trash this
270
- ]
278
+ ]
271
279
  end
272
-
280
+
273
281
  end
274
282
 
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NexusParser
4
+ VERSION = "1.2.1"
5
+ end