RubyGems - obo_parser - Versions diffs - 0.3.3 → 0.3.4 - Mend

obo_parser 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/README.rdoc CHANGED Viewed

@@ -30,14 +30,18 @@ A simple Ruby gem for parsing OBO 1.2 formatted ontology files.  Useful for repo
     first_typdef.id.value                               # => 'Some typedef id'
     first_typdef.name.value                             # => 'Some typedef name'
-    foo.terms.first.tags_named('is_a')                  # => [OboParser#Tag, ... ]
-    foo.terms.first.tags_named('is_a').first.tag        # => 'is_a'
-    foo.terms.first.tags_named('is_a').first.value      # => 'Some Term id'
+    foo.terms.first.tags_named('synonym')               # => [OboParser#Tag, ... ]
+    foo.terms.first.tags_named('synonym').first.tag     # => 'synonym'
+    foo.terms.first.tags_named('synonym').first.value   # => 'Some label'
+    foo.terms.first.relationships                       # => [['relation_ship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
 See also /test/test_obo_parser.rb
 == Utilties
+!! UTILTIES ARE PRESENTLY BORKED !!
 A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in utilities.rb. See /lib/utilities.rb.  For example, shared labels across sets of ontologies can be found and returned.
 == Copyright

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.3.3
1	+ 0.3.4

data/lib/obo_parser.rb CHANGED Viewed

@@ -34,6 +34,12 @@ module OboParser
       @terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
     end
+    # A single line in a Stanza within an OBO file
+    class Tag
+      attr_accessor :tag, :value, :xrefs, :comment, :qualifier, :related_term, :relation
+    end
+    # A collection of single lines (Tags)
     class Stanza
       # Make special reference to several specific types of tags (:name, :id), subclasses will remove additional special typs from :other_tags
       attr_accessor :name, :id, :def, :other_tags
@@ -45,10 +51,11 @@ module OboParser
           t = tags.shift
           new_tag = OboParser::Tag.new
           new_tag.tag = t.tag
           new_tag.value = t.value
           new_tag.comment = t.comment
-          new_tag.xrefs = t.xrefs
+          new_tag.xrefs = t.xrefs
           case new_tag.tag
           when 'id'
@@ -58,6 +65,11 @@ module OboParser
           when 'def'
             @def = new_tag
           else
+            if new_tag.tag == 'relationship'
+              new_tag.related_term = t.related_term
+              new_tag.relation = t.relation
+            end
             @other_tags.push(new_tag)
           end
         end
@@ -78,21 +90,23 @@ module OboParser
     # TODO: likely deprecate and run with one model (Stanza)
     class Term < Stanza
-  #   attr_accessor :some_term_specific_def
+     attr_accessor :relationships
       def initialize(tags)
-        super
-  #     anonymous_tags = []
-  #     # Loop through "unclaimed" tags and reference those specific to Term
-  #     while @other_tags.size != 0
-  #       t = @other_tags.shift
-  #       case t.tag
-  #       when 'def'
-  #         @def = t
-  #       else
-  #         anonymous_tags.push(t)
-  #       end
-  #     end
-  #     @other_tags = anonymous_tags
+       super
+       @relationships = []
+       anonymous_tags = []
+       # Loop through "unclaimed" tags and reference those specific to Term
+       while @other_tags.size != 0
+         t = @other_tags.shift
+         case t.tag
+         when 'relationship'
+           @relationships.push([t.relation, t.related_term])
+         else
+           anonymous_tags.push(t)
+         end
+       end
+       @other_tags = anonymous_tags
       end
     end
@@ -114,10 +128,6 @@ module OboParser
       end
     end
-    class Tag
-      attr_accessor :tag, :value, :xrefs, :comment, :qualifier
-    end
   end
   class OboParserBuilder

data/lib/parser.rb CHANGED Viewed

@@ -6,23 +6,24 @@ class OboParser::Parser
   def parse_file
     # At present we ignore the header lines
-    while !@lexer.peek(OboParser::Tokens::Term)
+    while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef)
       @lexer.pop(OboParser::Tokens::TagValuePair)
     end
     i = 0
     while !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
-      raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000 # there aren't that many words!
+      raise OboParser::ParseError, "infinite loop in Terms?" if i > 20000 # there aren't that many words!
       parse_term
       i += 1
     end
     i = 0
     while @lexer.peek(OboParser::Tokens::Typedef)
-      raise OboParser::ParseError,"infinite loop in Typedefs" if i > 1000000
+      raise OboParser::ParseError,"infinite loop in Typedefs?" if i > 20000
       parse_typedef
       i += 1
     end
   end
   def parse_term
@@ -30,8 +31,18 @@ class OboParser::Parser
     tags = []
     while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
       begin
-        t = @lexer.pop(OboParser::Tokens::TagValuePair)
+        if @lexer.peek(OboParser::Tokens::IsATag)
+          t = @lexer.pop(OboParser::Tokens::IsATag)
+        elsif @lexer.peek(OboParser::Tokens::DisjointFromTag)
+          t = @lexer.pop(OboParser::Tokens::DisjointFromTag)
+        elsif @lexer.peek(OboParser::Tokens::RelationshipTag)
+          t = @lexer.pop(OboParser::Tokens::RelationshipTag)
+        else
+          t = @lexer.pop(OboParser::Tokens::TagValuePair)
+        end
         tags.push(t)
       rescue
         raise
       end

data/lib/tokens.rb CHANGED Viewed

@@ -17,45 +17,101 @@ module OboParser::Tokens
     @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
   end
+  # Token eeds simplification, likely through creating additional tokens for quoted qualifiers, optional modifiers ({}), and the creation of individual
+  # tokens for individual tags that don't conform to the pattern used for def: tags.
+  # The code can't presently handle escaped characters (like \,), as bizzarely found in some OBO files.
   class TagValuePair < Token
-    attr_reader :tag, :comment, :xrefs, :qualifier
+    attr_reader :tag, :comment, :xrefs, :qualifier, :description
     @regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i)
     def initialize(str)
       str.strip!
       tag, value = str.split(':',2)
       value.strip!
-      # Handle comments
-      if value =~ /(!\s*.+)\Z/i
+      if tag == 'comment'
+        @tag = tag.strip
+        @value = value.strip
+        return
+      end
+      @xrefs = []
+      # Handle inline comments
+      if value =~ /(\s+!\s*.+)\s*\n*\z/i
         @comment = $1
         value.gsub!(@comment, '')
-        @comment.gsub!(/\A!\s*/, '')
         @comment.strip!
+        @comment.gsub!(/\A!\s*/, '')
+      end
+      value.strip!
+      # Qualifier for the whole tag
+      if value =~ /(\{[^{]*?\})\s*\n*\z/
+        @qualifier = $1
+        value.gsub!(@qualifier, '')
+        @qualifier.strip!
       end
-      # Break out the xrefs, could be made made robust
-      # Assumes non-quoted comma delimited in format 'foo:bar, stuff:things'
-      if value =~ /(\s*\[.*\]\s*)/i
+      value.strip!
+      # Handle a xref list TODO: Tokenize
+      if value =~ /(\[.*\])/i
         xref_list = $1
         value.gsub!(xref_list, '')
         xref_list.strip!
-        xref_list = xref_list[1..-2] # strip []
-        @xrefs = xref_list.split(",")
+        xref_list = xref_list[1..-2] # [] off
+        qq = 0 # some failsafes
+        while xref_list.length > 0
+          qq += 1
+          raise "#{xref_list}" if qq > 500
+          xref_list.gsub!(/\A\s*,\s*/, '')
+          xref_list =~ /\A(.+?:[^\"|\{|\,]+)/i
+          v = $1
+          if !(v == "") && !v.nil?
+            v.strip!
+            r = Regexp.escape v
+            xref_list.gsub!(/\A#{r}\s*/, '')
+            @xrefs.push(v) if !v.nil?
+          end
+          xref_list.strip!
+          # A description
+          if xref_list =~ /\A(\s*".*?")/i
+            d = $1
+            r = Regexp.escape d
+            xref_list.gsub!(/\A#{r}/, '')
+            xref_list.strip!
+          end
+          # A optional modifier
+          if xref_list =~ /\A(\s*\{[^\}]*?\})/
+            m = $1
+            r = Regexp.escape m
+            xref_list.gsub!(/\A#{r}/, '')
+            xref_list.strip!
+          end
+          xref_list.strip!
+        end
       end
-      if value =~ /\A\"/
-        value =~ /(".*")/
-        @value = $1
-        value.gsub!(@value, '')
-        @qualifier = value.strip
+      value.strip!
+      # At this point we still might have a '"foo" QUALIFIER' combination
+      if value =~ /\A(\"[^\"]*\")\s+(.*)/
+        @value = $1.strip
+        @qualifier = $2.strip if !$2.nil?
       else
         @value = value.strip
-        @qualifier = nil
       end
-      @value = @value[1..-2].strip if @value[0..0] == "\"" # get rid of quote marks
-      @value = @value[1..-2].strip if @value[0..0] == "'"  # get rid of quote marks
+      @value = @value[1..-2].strip if @value[0..0] == "\""
       @tag = tag.strip
       @value.strip!
     end
@@ -73,6 +129,51 @@ module OboParser::Tokens
     end
   end
+  class RelationshipTag < Token
+    attr_reader :tag, :related_term, :relation, :comment, :xrefs #, :qualifier
+    @regexp = Regexp.new(/\A\s*relationship:\s*(.+)\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
+    def initialize(str)
+      @tag = 'relationship'
+      @xrefs = []
+      @relation, @related_term = str.split(/\s/,3)
+      str =~ /\s+!\s+(.*)\s*\n*/i
+      @comment = $1
+      @comment ||= ""
+      [@relation, @related_term, @comment].map(&:strip!)
+    end
+  end
+  class IsATag < Token
+    attr_reader :tag, :related_term, :relation, :comment, :xrefs #, :qualifier
+    @regexp = Regexp.new(/\A\s*is_a:\s*(.+)\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
+    def initialize(str)
+      @tag = 'relationship'
+      @relation = 'is_a'
+      @related_term, @comment = str.split(/\s/,2)
+      @comment ||= ""
+      @comment.gsub!(/\A!\s*/, '')
+      [@relation, @related_term, @comment].map(&:strip!)
+      @xrefs = []
+    end
+  end
+  class DisjointFromTag < Token
+    attr_reader :tag, :related_term, :relation, :comment, :xrefs #, :qualifier
+    @regexp = Regexp.new(/\A\s*disjoint_from:\s*(.+)\s*\n*/i) #  returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
+    def initialize(str)
+      @tag = 'relationship'
+      @relation = 'disjoint_from'
+      @related_term, @comment = str.split(/\s/,2)
+      @comment ||= ""
+      @comment.gsub!(/\A!\s*/, '')
+      [@relation, @related_term, @comment].map(&:strip!)
+      @xrefs = []
+    end
+  end
   class NameValuePair < Token
     @regexp = Regexp.new('fail')
   end
@@ -167,6 +268,9 @@ module OboParser::Tokens
       OboParser::Tokens::Term,
       OboParser::Tokens::Typedef,
       OboParser::Tokens::LBracket,
+      OboParser::Tokens::DisjointFromTag,
+      OboParser::Tokens::IsATag,
+      OboParser::Tokens::RelationshipTag,
       OboParser::Tokens::TagValuePair,
       OboParser::Tokens::XrefList,
       OboParser::Tokens::EndOfFile

data/obo_parser.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{obo_parser}
-  s.version = "0.3.3"
+  s.version = "0.3.4"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["mjy"]
-  s.date = %q{2011-04-07}
+  s.date = %q{2011-04-11}
   s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file.  OBO version 1.2 is targeted, though this should work for 1.0. }
   s.email = %q{diapriid@gmail.com}
   s.extra_rdoc_files = [

data/test/cell.obo CHANGED Viewed

@@ -2979,7 +2979,7 @@ is_a: CL:0000412 ! polyploid cell
 [Term]
 id: CL:0000418
 name: arcade cell
-def: "An epithelial cell found in C. elegans that firmly hold the outer body wall and the lips to the inner cylinder of the pharynx in a manner that keeps these organs from breaking apart, while still giving each organ freedom of movement during feeding." [GOC:tf\,, http://www.wormatlas.org/ver1/handbook/hypodermis/hypsupportother.htm#arcadecells]
+def: "An epithelial cell found in C. elegans that firmly hold the outer body wall and the lips to the inner cylinder of the pharynx in a manner that keeps these organs from breaking apart, while still giving each organ freedom of movement during feeding." [GOC:tf, http://www.wormatlas.org/ver1/handbook/hypodermis/hypsupportother.htm#arcadecells]
 is_a: CL:0000066 ! epithelial cell
 [Term]

data/test/test_obo_parser.rb CHANGED Viewed

@@ -25,18 +25,18 @@ class Test_Regex < Test::Unit::TestCase
 end
 class Test_Lexer < Test::Unit::TestCase
   def test_term
-     lexer = OboParser::Lexer.new("[Term]")
-     assert lexer.pop(OboParser::Tokens::Term)
+    lexer = OboParser::Lexer.new("[Term]")
+    assert lexer.pop(OboParser::Tokens::Term)
   end
   def test_end_of_file
-     lexer = OboParser::Lexer.new("    \n\n")
-     assert lexer.pop(OboParser::Tokens::EndOfFile)
-     lexer = OboParser::Lexer.new("\n")
-     assert lexer.pop(OboParser::Tokens::EndOfFile)
+    lexer = OboParser::Lexer.new("    \n\n")
+    assert lexer.pop(OboParser::Tokens::EndOfFile)
+    lexer = OboParser::Lexer.new("\n")
+    assert lexer.pop(OboParser::Tokens::EndOfFile)
   end
   def test_parse_term_stanza
@@ -69,8 +69,18 @@ class Test_Lexer < Test::Unit::TestCase
     assert_equal 'PATO:0001301', t.value
   end
+  def test_typdef
+     input = '[Typedef]
+     id: part_of
+     name: part of
+     is_transitive: true'
+     assert foo = parse_obo_file(input)
+     assert_equal 1, foo.typedefs.size
+     assert_equal 'part_of', foo.typedefs.first.id.value
+  end
   def test_parse_term_stanza2
-   input = '[Term]
+    input = '[Term]
       id: CL:0000009
       name: fusiform initial
       alt_id: CL:0000274
@@ -85,23 +95,62 @@ class Test_Lexer < Test::Unit::TestCase
     assert_equal 'xylem initial', foo.terms.first.tags_named('synonym').first.value
     assert_equal 'xylem mother cell', foo.terms.first.tags_named('synonym')[1].value
     assert_equal 'CL:0000274', foo.terms.first.tags_named('alt_id').first.value
+    assert_equal 2, foo.terms.first.relationships.size
+    assert_equal(['CL:0000272', 'CL:0000610'], foo.terms.first.relationships.collect{|r| r[1]}.sort)
+    assert_equal(['is_a', 'is_a'], foo.terms.first.relationships.collect{|r| r[0]}.sort)
   end
   def test_parse_term
-     lexer = OboParser::Lexer.new("[Term]")
-     assert lexer.pop(OboParser::Tokens::Term)
+    lexer = OboParser::Lexer.new("[Term]")
+    assert lexer.pop(OboParser::Tokens::Term)
   end
   def test_xref_list
-     lexer = OboParser::Lexer.new("[foo:bar, stuff:things]")
-     assert t = lexer.pop(OboParser::Tokens::XrefList)
-     hsh = {'foo' => 'bar', 'stuff' => 'things'}
-     assert_equal hsh, t.value
+    lexer = OboParser::Lexer.new("[foo:bar, stuff:things]")
+    assert t = lexer.pop(OboParser::Tokens::XrefList)
+    assert_equal( {'foo' => 'bar', 'stuff' => 'things'} , t.value)
+  end
+  def test_relationship_tag
+    lexer = OboParser::Lexer.new("relationship: develops_from CL:0000333 ! neural crest cell")
+    assert t = lexer.pop(OboParser::Tokens::RelationshipTag)
+    assert_equal 'develops_from', t.relation
+    assert_equal 'CL:0000333', t.related_term
+    assert_equal 'relationship', t.tag
+    lexer = OboParser::Lexer.new("relationship: develops_from CL:0000333")
+    assert t = lexer.pop(OboParser::Tokens::RelationshipTag)
+    assert_equal 'develops_from', t.relation
+    assert_equal 'CL:0000333', t.related_term
+    assert_equal 'relationship', t.tag
+    lexer = OboParser::Lexer.new("is_a: CL:0000333 ! Foo")
+    assert t = lexer.pop(OboParser::Tokens::IsATag)
+    assert_equal 'is_a', t.relation
+    assert_equal 'CL:0000333', t.related_term
+    assert_equal 'Foo', t.comment
+    lexer = OboParser::Lexer.new("disjoint_from: CL:0000333")
+    assert t = lexer.pop(OboParser::Tokens::DisjointFromTag)
+    assert_equal 'disjoint_from', t.relation
+    assert_equal 'CL:0000333', t.related_term
+    assert_equal "", t.comment
+    lexer = OboParser::Lexer.new("relationship: part_of CL:0000333 ! Foo")
+    assert t = lexer.pop(OboParser::Tokens::RelationshipTag)
+    assert_equal 'part_of', t.relation
+    assert_equal 'CL:0000333', t.related_term
+    assert_equal 'Foo', t.comment
   end
   def test_tagvaluepair
-     lexer = OboParser::Lexer.new("id: PATO:0000179")
-     assert lexer.pop(OboParser::Tokens::TagValuePair)
+    lexer = OboParser::Lexer.new("id: PATO:0000179")
+    assert lexer.pop(OboParser::Tokens::TagValuePair)
   end
   def test_tagvaluepair_with_comments_and_xrefs
@@ -123,6 +172,22 @@ class Test_Lexer < Test::Unit::TestCase
     assert_equal([], t.xrefs)
   end
+  def test_that_xref_lists_parse_as_part_of_tagvalue_pair
+    lexer = OboParser::Lexer.new('def: "Foo and the bar, and stuff, and things.  More stuff, and things!" [GO_REF:0000031 "Foo!" , GOC:msz {some=trailingmodifier}, GOC:tfm, ISBN:9780781765190 "Fundamental Immunology!, 6ed (Paul,ed), 2003", PMID:16014527] {qualifier=foo} ! and a comment')
+    assert t = lexer.pop(OboParser::Tokens::TagValuePair)
+    assert_equal 'def', t.tag
+    assert_equal 'Foo and the bar, and stuff, and things.  More stuff, and things!', t.value
+    assert_equal(['GO_REF:0000031', 'GOC:msz', 'GOC:tfm', 'ISBN:9780781765190', 'PMID:16014527'], t.xrefs)
+  end
+  def test_crummy_space_filled_xrefs
+    lexer = OboParser::Lexer.new('def: "A quality inhering in a bearer by virtue of emitting light during exposure to radiation from an external source." [The Free Online dictionary:The Free Online dictionary "www.thefreedictionary.com/ -"]')
+    assert t = lexer.pop(OboParser::Tokens::TagValuePair)
+    assert_equal 'def', t.tag
+    assert_equal 'A quality inhering in a bearer by virtue of emitting light during exposure to radiation from an external source.', t.value
+    assert_equal(['The Free Online dictionary:The Free Online dictionary'], t.xrefs)
+  end
 end
 class Test_Parser < Test::Unit::TestCase
@@ -153,13 +218,7 @@ class Test_Parser < Test::Unit::TestCase
     assert_equal 'xylem mother cell', tmp[1].value
     assert_equal([], tmp[1].xrefs)
-    assert_equal 2, foo.terms[9].tags_named('is_a').size
-  end
-  def teardown
-    @of = nil
+    assert_equal 2, foo.terms[9].relationships.size
   end
   def test_file_completes_without_typedefs
@@ -167,5 +226,9 @@ class Test_Parser < Test::Unit::TestCase
     assert foo = parse_obo_file(@of2)
   end
+  def teardown
+    @of = nil
+  end
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: obo_parser
 version: !ruby/object:Gem::Version
-  hash: 21
+  hash: 27
   prerelease:
   segments:
   - 0
   - 3
-  - 3
-  version: 0.3.3
+  - 4
+  version: 0.3.4
 platform: ruby
 authors:
 - mjy
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-04-07 00:00:00 -04:00
+date: 2011-04-11 00:00:00 -04:00
 default_executable:
 dependencies: []