RubyGems - obo_parser - Versions diffs - 0.1.0 - Mend

obo_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/.document +5 -0
data/.gitignore +21 -0
data/LICENSE +20 -0
data/README +13 -0
data/README.rdoc +17 -0
data/Rakefile +53 -0
data/VERSION +1 -0
data/init.rb +1 -0
data/install.rb +1 -0
data/lib/lexer.rb +59 -0
data/lib/obo_file.rb +106 -0
data/lib/parser.rb +53 -0
data/lib/tokens.rb +160 -0
data/obo_parser.gemspec +61 -0
data/tasks/obo_parser_tasks.rake +4 -0
data/test/cell.obo +5867 -0
data/test/obo_1.0_test.txt +12582 -0
data/test/obo_1.0_test_wo_typedefs.txt +12561 -0
data/test/test_obo_parser.rb +109 -0
data/uninstall.rb +1 -0
metadata +83 -0

data/.document ADDED Viewed

@@ -0,0 +1,5 @@
+README.rdoc
+lib/**/*.rb
+bin/*
+features/**/*.feature
+LICENSE

data/.gitignore ADDED Viewed

@@ -0,0 +1,21 @@
+## MAC OS
+.DS_Store
+## TEXTMATE
+*.tmproj
+tmtags
+## EMACS
+*~
+\#*
+.\#*
+## VIM
+*.swp
+## PROJECT::GENERAL
+coverage
+rdoc
+pkg
+## PROJECT::SPECIFIC

data/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2009 mjy
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README ADDED Viewed

@@ -0,0 +1,13 @@
+NexusParser
+===========
+Introduction goes here.
+Example
+=======
+Example goes here.
+Copyright (c) 2008 Matt Yoder, released under the MIT license

data/README.rdoc ADDED Viewed

@@ -0,0 +1,17 @@
+= obo_parser
+A simple OBO file format parsing library.
+== Note on Patches/Pull Requests
+* Fork the project.
+* Make your feature addition or bug fix.
+* Add tests for it. This is important so I don't break it in a
+  future version unintentionally.
+* Commit, do not mess with rakefile, version, or history.
+  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
+* Send me a pull request. Bonus points for topic branches.
+== Copyright
+Copyright (c) 2010 Matt Yoder. See LICENSE for details.

data/Rakefile ADDED Viewed

@@ -0,0 +1,53 @@
+require 'rubygems'
+require 'rake'
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gem|
+    gem.name = "obo_parser"
+    gem.summary = %Q{A simple OBO file handler.}
+    gem.description = %Q{Provides all-in-one object containing the contents of an OBO formatted file.  OBO version 1.2 is targeted, though this should work for 1.0. }
+    gem.email = "diapriid@gmail.com"
+    gem.homepage = "http://github.com/mjy/obo_parser"
+    gem.authors = ["mjy"]
+    # gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
+    # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
+  end
+  Jeweler::GemcutterTasks.new
+rescue LoadError
+  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
+end
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+end
+begin
+  require 'rcov/rcovtask'
+  Rcov::RcovTask.new do |test|
+    test.libs << 'test'
+    test.pattern = 'test/**/test_*.rb'
+    test.verbose = true
+  end
+rescue LoadError
+  task :rcov do
+    abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
+  end
+end
+task :test => :check_dependencies
+task :default => :test
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "obo_parser #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/init.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Include hook code here

data/install.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Install hook code here

data/lib/lexer.rb ADDED Viewed

@@ -0,0 +1,59 @@
+class OboFile::Lexer
+  attr_reader :input
+  def initialize(input)
+    @input = input
+    @next_token = nil
+  end
+  # checks whether the next token is of the specified class.
+  def peek(token_class)
+    token = read_next_token(token_class)
+    return token.class == token_class
+  end
+  # return (and delete) the next token from the input stream, or raise an exception
+  # if the next token is not of the given class.
+  def pop(token_class)
+    token = read_next_token(token_class)
+    @next_token = nil
+    if token.class != token_class
+      raise(OboFile::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
+    else
+      return token
+    end
+  end
+  private
+   # read (and store) the next token from the input, if it has not already been read.
+   def read_next_token(token_class)
+      if @next_token
+        return @next_token
+      else
+      # check for a match on the specified class first
+      if match(token_class)
+        return @next_token
+      else
+        # now check all the tokens for a match
+        OboFile::Tokens.obo_file_token_list.each {|t|
+          return @next_token if match(t)
+        }
+      end
+       # no match, either end of string or lex-error
+       if @input != ''
+           raise(OboFile::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
+       else
+        return nil
+      end
+    end
+  end
+  def match(token_class)
+    if (m = token_class.regexp.match(@input))
+      @next_token = token_class.new(m[1])
+       @input = @input[m.end(0)..-1]
+      return true
+    else
+      return false
+    end
+  end
+end

data/lib/obo_file.rb ADDED Viewed

@@ -0,0 +1,106 @@
+# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
+# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
+# outstanding issues:
+module OboFile
+require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
+require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
+class OboFile # Node
+  attr_accessor :terms, :typedefs
+  def initialize
+    @terms = []
+    @typedefs = []
+  end
+  def term_strings
+    @terms.collect{|t| t.name}.sort
+  end
+  def term_hash
+    @terms.inject({}) {|sum, t| sum.update(t.name => t.id)}
+  end
+  class Stanza
+    attr_accessor :name, :id, :tags
+    # we can have only one of id, name, and some others (but this is a loose setup now)
+    # can have many of some other things- put them in tags
+    def initialize(tags)
+      @tags = {}
+      tags.each do |t|
+        case t[0]
+        when 'id'
+          @id = t[1]
+        when 'name'
+          @name = t[1]
+        else
+          @tags[t[0]] = [] if !@tags[t[0]]
+          @tags[t[0]].push t[1]
+        end
+      end
+    end
+  end
+  class Term < Stanza
+    attr_accessor :def
+    def initialize(tags)
+      super
+    end
+  end
+  class Typedef < Stanza
+    def initialize(tags)
+      super
+    end
+  end
+end
+class OboFileBuilder
+  def initialize
+    @of =  OboFile.new
+  end
+  def add_term(tags)
+    @of.terms.push OboFile::Term.new(tags)
+  end
+  def add_typedef(tags)
+    @of.typedefs.push OboFile::Typedef.new(tags)
+  end
+  def obo_file
+    @of
+  end
+end
+class ParseError < StandardError
+end
+end # end module
+# the actual method
+def parse_obo_file(input)
+  @input = input
+   raise(OboFile::ParseError, "Nothing passed to parse!") if  !@input ||  @input.size == 0
+  @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n")  # strip out comments - this is a kludge, likely needs fixing!!
+  builder = OboFile::OboFileBuilder.new
+  lexer = OboFile::Lexer.new(@input)
+  OboFile::Parser.new(lexer, builder).parse_file
+  return builder.obo_file
+end

data/lib/parser.rb ADDED Viewed

@@ -0,0 +1,53 @@
+class OboFile::Parser
+  def initialize(lexer, builder)
+    @lexer = lexer
+    @builder = builder
+  end
+  def parse_file
+    # toss everything right now, we just want the terms
+    while !@lexer.peek(OboFile::Tokens::Term)
+      @lexer.pop(OboFile::Tokens::TagValuePair)
+    end
+    i = 0
+    while !@lexer.peek(OboFile::Tokens::Typedef) && !@lexer.peek(OboFile::Tokens::EndOfFile)
+      raise OboFile::ParseError, "infinite loop in Terms" if i > 10000000
+      parse_term
+      i += 1
+    end
+    i = 0
+    while @lexer.peek(OboFile::Tokens::Typedef)
+      raise OboFile::ParseError,"infinite loop in Terms" if i > 1000000 # there aren't that many words!
+      parse_typedef
+      i += 1
+    end
+  end
+  def parse_term
+    t = @lexer.pop(OboFile::Tokens::Term)
+    tags = []
+    while !@lexer.peek(OboFile::Tokens::Term) && !@lexer.peek(OboFile::Tokens::Typedef) && !@lexer.peek(OboFile::Tokens::EndOfFile)
+      if @lexer.peek(OboFile::Tokens::TagValuePair)
+        t = @lexer.pop(OboFile::Tokens::TagValuePair)
+        tags.push [t.tag, t.value]
+      else
+        raise(OboFile::ParseError, "Expected a tag-value pair, but did not get one following this tag/value: [#{t.tag} / #{t.value}]")
+      end
+    end
+    @builder.add_term(tags)
+  end
+  def parse_typedef
+    @lexer.pop(OboFile::Tokens::Typedef)
+    #  @t = @builder.stub_typdef
+    tags = []
+    while !@lexer.peek(OboFile::Tokens::Typedef) && @lexer.peek(OboFile::Tokens::TagValuePair)
+      t = @lexer.pop(OboFile::Tokens::TagValuePair)
+      tags.push [t.tag, t.value]
+    end
+    @builder.add_typedef(tags)
+  end
+end

data/lib/tokens.rb ADDED Viewed

@@ -0,0 +1,160 @@
+module OboFile::Tokens
+  class Token
+    # this allows access the the class attribute regexp, without using a class variable
+    class << self; attr_reader :regexp; end
+    attr_reader :value
+    def initialize(str)
+      @value = str
+    end
+  end
+  # in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
+  # moving along popping off
+  class Term < Token
+    @regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
+  end
+  class Typedef < Token
+    @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
+  end
+  class TagValuePair < Token
+    attr_reader :tag, :value
+    @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
+    def initialize(str)
+      str.strip!
+      str = str.split(':',2)
+      str[1].strip!
+      # strip trailing comments
+      @tag = str[0]
+      @value = str[1]
+    end
+  end
+  class NameValuePair < Token
+    @regexp = Regexp.new('fail')
+  end
+  class Dbxref < Token
+    @regexp = Regexp.new('fail')
+  end
+  # same as ID
+  class Label < Token
+    @regexp = Regexp.new('\A\s*((\'+[^\']+\'+)|(\"+[^\"]+\"+)|(\w[^,:(); \t\n]*|_)+)\s*') #  matches "foo and stuff", foo, 'stuff or foo', '''foo''', """bar""" BUT NOT ""foo" "
+    def initialize(str)
+      str.strip!
+      str = str[1..-2] if str[0..0] == "'" # get rid of quote marks
+      str = str[1..-2] if str[0..0] == '"'
+      str.strip!
+      @value = str
+    end
+  end
+  # note we grab EOL and ; here
+  class ValuePair < Token
+    @regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
+    def initialize(str)
+      str.strip!
+      str = str.split(/=/)
+      str[1].strip!
+      str[1] = str[1][1..-2] if str[1][0..0] == "'"
+      str[1] = str[1][1..-2] if str[1][0..0] ==  "\""
+      @value = {str[0].strip.downcase.to_sym => str[1].strip}
+    end
+  end
+  class Matrix < Token
+    @regexp = Regexp.new(/\A\s*(matrix)\s*/i)
+  end
+  class RowVec < Token
+    @regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
+     def initialize(str)
+      s = str.split(/\(|\)/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
+      @value = s
+    end
+  end
+  ## punctuation
+  class LBracket < Token
+    @regexp = Regexp.new('\A\s*(\[)\s*')
+  end
+  class RBracket < Token
+    @regexp = Regexp.new('\A\s*(\])\s*')
+  end
+  class LParen < Token
+      @regexp = Regexp.new('\A\s*(\()\s*')
+  end
+  class RParen < Token
+    @regexp = Regexp.new('\A\s*(\))\s*')
+  end
+  class Equals < Token
+    @regexp = Regexp.new('\A\s*(=)\s*')
+  end
+  class BckSlash < Token
+    @regexp = Regexp.new('\A\s*(\/)\s*')
+  end
+  class Colon < Token
+    @regexp = Regexp.new('\A\s*(:)\s*')
+  end
+  class SemiColon < Token
+    @regexp = Regexp.new('\A\s*(;)\s*')
+  end
+  class Comma < Token
+    @regexp = Regexp.new('\A\s*(\,)\s*')
+  end
+  class EndOfFile < Token
+    @regexp = Regexp.new('\A(\s*\n*)\Z')
+  end
+  class Number < Token
+    @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
+    def initialize(str)
+      # a little oddness here, in some case we don't want to include the .0
+      # see issues with numbers as labels
+      if str =~ /\./
+        @value = str.to_f
+      else
+        @value = str.to_i
+      end
+    end
+  end
+  # Tokens::NexusComment
+  # this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
+  def self.obo_file_token_list
+    [
+      OboFile::Tokens::Term,
+      OboFile::Tokens::Typedef,
+      OboFile::Tokens::TagValuePair,
+      OboFile::Tokens::NameValuePair,  # not implemented
+      OboFile::Tokens::Dbxref,         # not implemented
+      OboFile::Tokens::LBracket,
+      OboFile::Tokens::EndOfFile
+    ]
+  end
+end

data/obo_parser.gemspec ADDED Viewed

@@ -0,0 +1,61 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{obo_parser}
+  s.version = "0.1.0"
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["mjy"]
+  s.date = %q{2010-03-17}
+  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file.  OBO version 1.2 is targeted, though this should work for 1.0. }
+  s.email = %q{diapriid@gmail.com}
+  s.extra_rdoc_files = [
+    "LICENSE",
+     "README",
+     "README.rdoc"
+  ]
+  s.files = [
+    ".document",
+     ".gitignore",
+     "LICENSE",
+     "README",
+     "README.rdoc",
+     "Rakefile",
+     "VERSION",
+     "init.rb",
+     "install.rb",
+     "lib/lexer.rb",
+     "lib/obo_file.rb",
+     "lib/parser.rb",
+     "lib/tokens.rb",
+     "obo_parser.gemspec",
+     "tasks/obo_parser_tasks.rake",
+     "test/cell.obo",
+     "test/obo_1.0_test.txt",
+     "test/obo_1.0_test_wo_typedefs.txt",
+     "test/test_obo_parser.rb",
+     "uninstall.rb"
+  ]
+  s.homepage = %q{http://github.com/mjy/obo_parser}
+  s.rdoc_options = ["--charset=UTF-8"]
+  s.require_paths = ["lib"]
+  s.rubygems_version = %q{1.3.6}
+  s.summary = %q{A simple OBO file handler.}
+  s.test_files = [
+    "test/test_obo_parser.rb"
+  ]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+    else
+    end
+  else
+  end
+end

data/tasks/obo_parser_tasks.rake ADDED Viewed

@@ -0,0 +1,4 @@
+# desc "Explaining what the task does"
+# task :foo_task do
+#   # Task goes here
+# end