RubyGems - obo_parser - Versions diffs - 0.3.4 → 0.3.5 - Mend

obo_parser 0.3.4 → 0.3.5

Files changed (12) hide show

data/README.rdoc CHANGED Viewed

@@ -1,6 +1,6 @@
 = obo_parser
-A simple Ruby gem for parsing OBO 1.2 formatted ontology files.  Useful for reporting, comparing, and mapping data to other databases.  There is presently no functionality for logical inference across the ontology.
+A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files.  Useful for reporting, comparing, and mapping data to other databases.  There is presently no functionality for logical inference across the ontology.
 == Installation
@@ -8,6 +8,8 @@ A simple Ruby gem for parsing OBO 1.2 formatted ontology files.  Useful for repo
 == Use
+=== General
     require 'rubygems'
     require 'obo_parser'
     foo = parse_obo_file(File.read('my_ontology.obo'))  # => An OboParser instance
@@ -36,13 +38,20 @@ A simple Ruby gem for parsing OBO 1.2 formatted ontology files.  Useful for repo
     foo.terms.first.relationships                       # => [['relation_ship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
+=== Convenience methods
+    foo.term_hash                                       # => { term (String) => id (String), ... for each [Term] in the file. } !! Assumes names terms are unique, they might not be, in which case you get key collisions.
+    foo.id_hash                                         # => { id (String) => term (String), ... for each [Term] in the file. }
 See also /test/test_obo_parser.rb
 == Utilties
-!! UTILTIES ARE PRESENTLY BORKED !!
+A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb.  For example, shared labels across sets of ontologies can be found and returned.
+== Documentation
-A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in utilities.rb. See /lib/utilities.rb.  For example, shared labels across sets of ontologies can be found and returned.
+Code documentation is slowly being formalized using Yard.
 == Copyright

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.3.4
1	+ 0.3.5

data/lib/tokens.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module OboParser::Tokens
   class Token
-    # this allows access the the class attribute regexp, without using a class variable
+    # this allows access the to class attribute regexp, without using a class variable
     class << self; attr_reader :regexp; end
     attr_reader :value
     def initialize(str)
@@ -17,7 +17,7 @@ module OboParser::Tokens
     @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
   end
-  # Token eeds simplification, likely through creating additional tokens for quoted qualifiers, optional modifiers ({}), and the creation of individual
+  # Token needs simplification, likely through creating additional tokens for quoted qualifiers, optional modifiers ({}), and the creation of individual
   # tokens for individual tags that don't conform to the pattern used for def: tags.
   # The code can't presently handle escaped characters (like \,), as bizzarely found in some OBO files.
   class TagValuePair < Token
@@ -66,6 +66,7 @@ module OboParser::Tokens
         qq = 0 # some failsafes
         while xref_list.length > 0
           qq += 1
+          debugger if qq == 499
           raise "#{xref_list}" if qq > 500
           xref_list.gsub!(/\A\s*,\s*/, '')

data/lib/utilities.rb CHANGED Viewed

@@ -4,79 +4,61 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'obo_parser'))
 module OboParser::Utilities
-  # Example usage
-	#	of1 = File.read('hao1.obo')
-	#	of2 = File.read('hao2.obo')
-	#	of3 = File.read('hao3.obo')
-	#	of4 = File.read('hao4.obo')
+  # Summarizes labels used by id in a two column tab delimited format
+  # Providing a cutoff will report only those ids/labels with > 1 label per id
+  # Does not (yet) include reference to synonyms, this could be easily extended.
   #
-  #  OboParser::Utilities::dump_comparison_by_id([of1, of2, of3, of4])
-  def self.dump_comparison_by_id(files = []) # :yields: String
+  #== Example use
+  #		of1 = File.read('foo1.obo')
+  #		of2 = File.read('foo2.obo')
+  #		of3 = File.read('foo3.obo')
+  #		of4 = File.read('foo4.obo')
+  #
+  #  OboParser::Utilities.dump_comparison_by_id(0,[of1, of2, of3, of4])
+  #
+  # @param [Integer] cutoff only Term ids with > cutoff labels will be reported
+  # @param [Array] files an Array of read files
+  # @return [String] the transation in tab delimted format
+  def self.dump_comparison_by_id(cutoff = 0, files = [])
+    return '' if files.size < 1
     of = []
     files.each_with_index do |f, i|
       of[i] = parse_obo_file(f)
     end
     all_data = {}
     of.each do |f|
       tmp_hash = f.id_hash
       tmp_hash.keys.each do |id|
         if all_data[id]
-          all_data[id].push tmp_hash[id]
+          all_data[id].push(tmp_hash[id])
         else
           all_data[id] = [tmp_hash[id]]
         end
       end
     end
-    puts "\nA list of all labels used across all submitted files for a given ID\n\n"
     all_data.keys.sort.each do |k|
-      if all_data[k].uniq.size > 1
-        puts "#{k}\t: #{all_data[k].uniq.join(', ')}"
+      if all_data[k].uniq.size > cutoff
+        puts "#{k}\t#{all_data[k].uniq.join(', ')}"
       end
     end
   end
-  # infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
-  # The file is replicated to STDOUT replacing the ID with the Term
-  def self.alignment_translate(infile = nil) # :yields: String
-    agreement = ARGV[0]
-    raise "Provide a file with comparison." if agreement.nil?
-    comparison = File.read(agreement)
-    obo_files = Dir.entries('.').inject([]){|sum, a| sum.push( a =~ /\.obo\Z/ ? a : nil)}.compact!
-    identifiers = {}
-    obo_files.each do |f|
-      puts "Reading: #{f}"
-      identifiers.merge!(  parse_obo_file(File.read(f)).id_hash )
-    end
-    comparison.each do |l|
-      v1, v2 = l.split("\t")
-      # puts "#{v1} - #{v2}"
-      next if v1.nil? || v2.nil?
-      v1.gsub!(/_/, ":")
-      v1.strip!
-      v2.gsub!(/_/, ":")
-      v2.strip!
-      puts (identifiers[v1].nil? ? 'NOT FOUND' : identifiers[v1]) +
-            "\t" +
-           (identifiers[v2].nil? ? 'NOT FOUND' : identifiers[v2])
-    end
-  end
-  # Returns labels found in all passed ontologies
-  # Usage:
+  # Returns all labels found in all passed ontologies. Does not yet include synonyms.
+  #
+  #== Example use
   #  of1 = File.read('fly_anatomy.obo')
   #  of2 = File.read('hao.obo')
   #  of3 = File.read('mosquito_anatomy.obo')
-  #  shared_labels([of1, of6])
-  def self.shared_labels(files = []) # :yields: String
+  #
+  #  OboParser::Utilities.shared_labels([of1, of3])
+  #
+  # @param [Array] files an Array of read files
+  # @return [String] lables, one per line
+  def self.shared_labels(files = [])
     comparison = {}
     files.each do |f|
@@ -91,17 +73,175 @@ module OboParser::Utilities
         end
       end
     end
-   match = []
+    match = []
     comparison.keys.each do |k|
       if comparison[k] == files.size
         match.push k
       end
     end
-   puts  match.sort.join("\n")
-   puts "\n#{match.length} total."
+    puts  match.sort.join("\n")
+    puts "\n#{match.length} total."
   end
+  #== Two column translation tools
+HOMOLONTO_HEADER = %{
+format-version: 1.2
+auto-generated-by: obo_parser
+default-namespace: fix_me
+[Typedef]
+id: OGEE:has_member
+name: has_member
+is_a: OBO_REL:relationship
+def: "C has_member C', C is an homology group and C' is a biological object" []
+comment: "We leave open the possibility that an homology group is a biological object. Thus, an homology group C may have C' has_member, with C' being an homology group."
+is_transitive: true
+is_anti_symmetric: true
+}
+  # Takes a two column input file, references it to two ontologies, and provides a report.
+  #
+  #== Example use
+  #  file = File.read('HAO_TGMA_list.txt')
+  #  col1_obo = File.read('hao.obo')
+  #  col2_obo = File.read('tgma.obo')
+  #  column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
+  #
+  #  OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
+  #== Output types
+  # There are several output report types
+  #   :xls - Translates the columns in the data_file to the option passed in :translate_to, the first matching against col1_obo, the second against col2_obo.  Returns an Excel file.
+  #   :homolonto - Generates a homolonto compatible file to STDOUT
+  #   :cols - Prints a two column format to STDOUT
+  #
+  # @param [Hash] options options.
+  # @param [Symbol] data the two column data file.
+  # @return [String] the transation in tab delimted format.
+  def self.column_translate(options = {})
+    opt = {
+      :data => nil,
+      :col1_obo => nil,
+      :col2_obo => nil,
+      :translate_to => :id,    # also :label
+      :output => :cols,        # also :xls, :homolonto
+      :output_filename => 'foo',
+      :index_start => 0
+    }.merge!(options)
+    c1obo = parse_obo_file(opt[:col1_obo])
+    c2obo = parse_obo_file(opt[:col2_obo])
+    case opt[:output]
+    when :xls
+      Spreadsheet.client_encoding = 'UTF-8'
+      book = Spreadsheet::Workbook.new
+      sheet = book.create_worksheet
+    when :homolonto
+      s = HOMOLONTO_HEADER
+      opt[:translate_to] = :id # force this in this mode
+    end
+    i = opt[:index_start]
+    v1 = nil # a label like 'head'
+    v2 = nil
+    c1 = nil # an id 'FOO:123'
+    c2 = nil
+    opt[:data].split(/\n/).each do |row|
+      i += 1
+      c1, c2 =  row.split(/\t/).map(&:strip)
+      if c1.nil? || c2.nil?
+        puts
+        next
+      end
+      # the conversion
+      if opt[:translate_to] == :id
+        if c1 =~ /.*\:.*/ # it's an id, leave it
+          v1 = c1
+        else
+          v1 = c1obo.term_hash[c1]
+        end
+        if c2 =~ /.*\:.*/
+          v2 = c2
+        else
+          v2 = c2obo.term_hash[c2]
+        end
+      else
+        if c1 =~ /.*\:.*/
+          v1 = c1obo.id_hash[c1]
+        else
+          v1 = c1
+        end
+        if c2 =~ /.*\:.*/
+          v2 = c2obo.id_hash[c2]
+        else
+          v2 = c2
+        end
+      end
+      case opt[:output]
+      when :cols
+        puts "#{v1}\t#{v2}"
+      when :xls
+        sheet[i,0] = v1
+        sheet[i,1] = OboParser::Utilities.term_stanza_from_file(v1, opt[:col1_obo])
+        sheet[i,2] = v2
+        sheet[i,3] = OboParser::Utilities.term_stanza_from_file(v2, opt[:col2_obo])
+      when :homolonto
+        s << OboParser::Utilities.homolonto_stanza(i, c1obo.id_hash[v1] , v1, v2) # "#{c1obo.id_hash[v1]} ! #{c2obo.id_hash[v2]}"
+        s << "\n\n"
+      end
+    end
+    case opt[:output]
+    when :xls
+      book.write "#{opt[:output_filename]}.xls"
+    when :homolonto
+      puts s + "\n"
+    end
+    true
+  end
+  # Returns a HomolOnto Stanza
+  #
+  # @param [String] id an externally tracked id for the id: tag like '00001'
+  # @param [String] name a name for the name: tag
+  # @param [Array] members a Array of 2 or more members for the relationship: has_member tag like ['FOO:123', 'BAR:456']
+  # @return [String] the stanza requested
+  def self.homolonto_stanza(id, name, *members)
+    return 'NOT ENOUGH RELATIONSHIPS' if members.length < 2
+    s = []
+    s << '[Term]'
+    s << "id: HOG:#{id}"
+    s << "name: #{name}"
+    members.each do |m|
+      s << "relationship: has_member #{m}"
+    end
+    s.join("\n")
+  end
+#== Helper methods that don't require the obo_parser library
+  # Given a Term id and a String representing an OBO file returns that stanza.
+  #
+  # @param [String] id a Term id like 'FOO:123'
+  # @param [String] file a Obo file as a String like File.read('my.obo')
+  # @return [String] the stanza requested
+  def self.term_stanza_from_file(id, file)
+    foo = ""
+    file =~ /(^\[Term\]\s*?id:\s*?#{id}.*?)(^\[Term\]|^\[Typedef\])/im
+    foo = $1 if !$1.nil?
+    foo.gsub(/\n\r/,"\n")
+  end
 end

data/obo_parser.gemspec CHANGED Viewed

@@ -1,51 +1,49 @@
 # Generated by jeweler
 # DO NOT EDIT THIS FILE DIRECTLY
-# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
+# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
 # -*- encoding: utf-8 -*-
 Gem::Specification.new do |s|
   s.name = %q{obo_parser}
-  s.version = "0.3.4"
+  s.version = "0.3.5"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["mjy"]
-  s.date = %q{2011-04-11}
+  s.date = %q{2011-06-09}
   s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file.  OBO version 1.2 is targeted, though this should work for 1.0. }
   s.email = %q{diapriid@gmail.com}
   s.extra_rdoc_files = [
     "LICENSE",
-     "README.rdoc"
+    "README.rdoc"
   ]
   s.files = [
     ".document",
-     ".gitignore",
-     "LICENSE",
-     "README.rdoc",
-     "Rakefile",
-     "VERSION",
-     "init.rb",
-     "install.rb",
-     "lib/lexer.rb",
-     "lib/obo_parser.rb",
-     "lib/parser.rb",
-     "lib/tokens.rb",
-     "lib/utilities.rb",
-     "obo_parser.gemspec",
-     "tasks/obo_parser_tasks.rake",
-     "test/cell.obo",
-     "test/obo_1.0_test.txt",
-     "test/obo_1.0_test_wo_typedefs.txt",
-     "test/test_obo_parser.rb",
-     "uninstall.rb"
+    "LICENSE",
+    "README.rdoc",
+    "Rakefile",
+    "VERSION",
+    "init.rb",
+    "install.rb",
+    "lib/lexer.rb",
+    "lib/obo_parser.rb",
+    "lib/parser.rb",
+    "lib/tokens.rb",
+    "lib/utilities.rb",
+    "obo_parser.gemspec",
+    "tasks/obo_parser_tasks.rake",
+    "test/cell.obo",
+    "test/go.obo",
+    "test/hao.obo",
+    "test/obo_1.0_test.txt",
+    "test/obo_1.0_test_wo_typedefs.txt",
+    "test/test_obo_parser.rb",
+    "test/tgma.obo",
+    "uninstall.rb"
   ]
   s.homepage = %q{http://github.com/mjy/obo_parser}
-  s.rdoc_options = ["--charset=UTF-8"]
   s.require_paths = ["lib"]
-  s.rubygems_version = %q{1.5.3}
+  s.rubygems_version = %q{1.7.2}
   s.summary = %q{A simple OBO file handler.}
-  s.test_files = [
-    "test/test_obo_parser.rb"
-  ]
   if s.respond_to? :specification_version then
     s.specification_version = 3

data/test/cell.obo CHANGED Viewed

@@ -4365,7 +4365,7 @@ is_a: CL:0000255 ! eukaryotic cell
 [Term]
 id: CL:0000611
 name: eosinophil progenitor cell
-comment: These cells are CD34-positive, CD45RA-negative, CD71-negative, and lineage-negative (CD2, CD3 epsilon, CD4, CD5, CD8a, CD14, CD19, CD20, integrin alpha-M, NCAM-1, SCA�1, Ly6G, Ly76).
+comment: These cells are CD34-positive, CD45RA-negative, CD71-negative, and lineage-negative (CD2, CD3 epsilon, CD4, CD5, CD8a, CD14, CD19, CD20, integrin alpha-M, NCAM-1, SCA¿1, Ly6G, Ly76).
 synonym: "CFU-Eo" RELATED []
 synonym: "colony forming unit eosinophil" RELATED []
 synonym: "EoP" EXACT [PMID:15955840, PMID:19114669]