RubyGems - rbbt-sources - Versions diffs - 0.2.2 → 0.3.1 - Mend

rbbt-sources 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

data/lib/rbbt/sources/COSTART.rb +2 -3
data/lib/rbbt/sources/CTCAE.rb +1 -1
data/lib/rbbt/sources/biomart.rb +32 -32
data/lib/rbbt/sources/entrez.rb +14 -10
data/lib/rbbt/sources/go.rb +9 -8
data/lib/rbbt/sources/organism.rb +36 -10
data/lib/rbbt/sources/organism/sequence.rb +337 -0
data/lib/rbbt/sources/polysearch.rb +5 -5
data/share/install/Organism/Hsa/Rakefile +7 -68
data/share/install/Organism/Sce/Rakefile +4 -70
data/share/install/Organism/organism_helpers.rb +305 -0
data/share/install/lib/helpers.rb +5 -5
data/test/rbbt/sources/test_biomart.rb +7 -6
data/test/rbbt/sources/test_entrez.rb +3 -3
data/test/rbbt/sources/test_organism.rb +32 -3
data/test/rbbt/sources/test_pubmed.rb +1 -1
metadata +7 -6
data/lib/rbbt/sources/Reactome.rb +0 -16

data/lib/rbbt/sources/COSTART.rb CHANGED Viewed

@@ -2,8 +2,7 @@ require 'rbbt-util'
 module COSTART
-  Rbbt.claim "COSTART",
-    Proc.new do
+  Rbbt.share.databases.COSTART.COSTART.define_as_proc do
       terms = ["#COSTART Terms"]
       Open.open('http://hedwig.mgh.harvard.edu/biostatistics/files/costart.html').lines.each do |line|
         puts line
@@ -12,5 +11,5 @@ module COSTART
       end
       terms * "\n"
-    end, 'COSTART'
+  end
 end

data/lib/rbbt/sources/CTCAE.rb CHANGED Viewed

@@ -2,5 +2,5 @@ require 'rbbt-util'
 require 'rbbt/util/excel2tsv'
 module CTCAE
-  Rbb.claim "CTCAE", TSV.excel2tsv('http://evs.nci.nih.gov/ftp1/CTCAE/CTCAE_4.03_2010-06-14.xls'), 'CTCAE'
+  Rbbt.share.CTCAE.CTCAE.define_as_url TSV.excel2tsv('http://evs.nci.nih.gov/ftp1/CTCAE/CTCAE_4.03_2010-06-14.xls')
 end

data/lib/rbbt/sources/biomart.rb CHANGED Viewed

@@ -1,5 +1,6 @@
-require 'rbbt-util'
+require 'rbbt/util/tsv'
 require 'rbbt/util/log'
+require 'cgi'
 # This module interacts with BioMart. It performs queries to BioMart and
 # synthesises a hash with the results. Note that this module connects to the
@@ -27,21 +28,23 @@ module BioMart
   EOT
   def self.set_archive(date)
-    @archive_url = BIOMART_URL.sub(/www\.biomar\./, date + '.archive.ensemble')
+    @archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
+    Log.debug "Using Archive URL #{ @archive_url }"
   end
   def self.unset_archive
+    Log.debug "Restoring current version URL #{BIOMART_URL}"
     @archive_url = nil
   end
   def self.get(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
+    repeats = true
     attrs   ||= []
     filters ||= ["with_#{main}"]
-    data    ||= {}
     query = @@biomart_query_xml.dup
     query.sub!(/<!--DATABASE-->/,database)
-    query.sub!(/<!--FILTERS-->/, filters.collect{|name| "<Filter name = \"#{ name }\" excluded = \"0\"/>"}.join("\n") )
+    query.sub!(/<!--FILTERS-->/, filters.collect{|name, v| v.nil? ? "<Filter name = \"#{ name }\" excluded = \"0\"/>" : "<Filter name = \"#{ name }\" value = \"#{Array === v ? v * "," : v}\"/>" }.join("\n") )
     query.sub!(/<!--MAIN-->/,"<Attribute name = \"#{main}\" />")
     query.sub!(/<!--ATTRIBUTES-->/, attrs.collect{|name| "<Attribute name = \"#{ name }\"/>"}.join("\n") )
@@ -55,23 +58,18 @@ module BioMart
       raise BioMart::QueryError, response
     end
-    response.each_line{|l|
-      parts = l.chomp.split(/\t/)
-      main = parts.shift
-      next if main.nil? || main.empty?
-      data[main] ||= {}
-      attrs.each{|name|
-        value = parts.shift
-        data[main][name] ||= []
-        next if value.nil? or value.empty?
-        if data[main][name]
-          data[main][name] = [value]
-        else
-          data[main][name] << value unless data[main][name].include? value
-        end
-      }
-    }
+    result_file = TmpFile.tmp_file
+    Open.write(result_file, response)
+    if data.nil?
+      data = result_file
+    else
+      new_datafile = TmpFile.tmp_file
+      TSV.paste_merge data, result_file, new_datafile
+      FileUtils.rm data
+      data = new_datafile
+      FileUtils.rm result_file
+    end
     data
   end
@@ -95,8 +93,9 @@ module BioMart
   def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
     open_options = Misc.add_defaults open_options, :nocache => false
     attrs   ||= []
-    data    ||= {}
+    open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
     Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
     max_items = 2
@@ -115,21 +114,22 @@ module BioMart
     Log.low "Chunks: #{chunks.length}"
     chunks.each_with_index{|chunk,i|
-      Log.low "Chunk #{ i }: [#{chunk * ", "}]"
+      Log.low "Chunk #{ i + 1 } / #{chunks.length}: [#{chunk * ", "}]"
       data = get(database, main, chunk, filters, data, open_options)
     }
-    data
+    result = TSV.new(data, open_options)
+    result.key_field = main
+    result.fields = attrs
+    result.filename = "BioMart: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}"
+    FileUtils.rm data
+    result
   end
   def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
     codes = attrs.collect{|attr| attr[1]}
-    data = query(database, main.last, codes, filters, data, open_options)
-    tsv = TSV.new({})
-    data.each do |key, info|
-      tsv[key] = info.values_at(*codes)
-    end
+    tsv = query(database, main.last, codes, filters, data, open_options)
     tsv.key_field = main.first
     tsv.fields    = attrs.collect{|attr| attr.first}

data/lib/rbbt/sources/entrez.rb CHANGED Viewed

@@ -5,29 +5,29 @@ require 'set'
 module Entrez
-  Rbbt.claim "gene_info", 'ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz', 'databases/entrez'
-  Rbbt.claim "gene2pubmed", 'ftp://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz', 'databases/entrez'
+  Rbbt.share.databases.entrez.gene_info.define_as_url 'ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz'
+  Rbbt.share.databases.entrez.gene2pubmed.define_as_url 'ftp://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz'
   def self.entrez2native(taxs, options = {})
-    options = Misc.add_defaults options, :key => 1, :others => 5, :persistence => true, :merge => true
+    options = Misc.add_defaults options, :key => 1, :fields => 5, :persistence => true, :merge => true
     taxs = [taxs] unless Array === taxs
-    options.merge! :grep => taxs
-    tsv = TSV.new(Rbbt.files.databases.entrez.gene_info, :flat, options)
+    options.merge! :grep => taxs.collect{|t| "^#{ t }\t"}
+    tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
     tsv.key_field = "Entrez Gene ID"
     tsv.fields    = ["Native ID"]
     tsv
   end
   def self.entrez2pubmed(taxs)
-    options = {:key => 1, :others => 2, :persistence => true, :merge => true}
+    options = {:key => 1, :fields => 2, :persistence => true, :merge => true}
     taxs = [taxs] unless taxs.is_a?(Array)
     taxs = taxs.collect{|t| t.to_s}
-    options.merge! :grep => taxs
+    options.merge! :grep => taxs.collect{|t| "^#{ t }\t"}
-    TSV.new(Rbbt.files.databases.entrez.gene2pubmed, :flat, options)
+    Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
   end
   class Gene
@@ -132,7 +132,11 @@ module Entrez
     when Entrez::Gene === gene
       gene_text = gene.text
     when String === gene || Fixnum === gene
-      gene_text =  get_gene(gene).text
+      begin
+        gene_text =  get_gene(gene).text
+      rescue CMD::CMDError
+        return 0
+      end
     else
       return 0
     end

data/lib/rbbt/sources/go.rb CHANGED Viewed

@@ -4,19 +4,20 @@ require 'rbbt-util'
 # now all it does is provide a translation form id to the actual names.
 module GO
-  Rbbt.claim :gene_ontology, 'ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo', 'databases/GO'
-  Rbbt.claim :goslim_generic, 'http://www.geneontology.org/GO_slims/goslim_generic.obo', 'databases/GO'
+  Rbbt.share.databases.GO.gene_ontology.define_as_url 'ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo'
+  Rbbt.share.databases.GO.gslim_generic.define_as_url 'http://www.geneontology.org/GO_slims/goslim_generic.obo'
   MULTIPLE_VALUE_FIELDS = %w(is_a)
-  TSV_GENE_ONTOLOGY = File.join(TSV.cachedir, 'gene_ontology')
+  TSV_GENE_ONTOLOGY = File.join(Persistence.cachedir, 'gene_ontology')
   # This method needs to be called before any translations can be made, it is
   # called automatically the first time the id2name method is called. It loads
   # the gene_ontology.obo file and extracts all the fields, although right now,
   # only the name field is used.
   def self.init
-    info = TCHash.new(TSV_GENE_ONTOLOGY, true)
-    File.open(Rbbt.find_datafile('gene_ontology')).read.split(/\[Term\]/).each{|term|
+    init = Persistence.persist_tsv('gene_ontology', :Misc) do
+      info = {}
+      Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
         term_info = {}
         term.split(/\n/). select{|l| l =~ /:/}.each{|l|
@@ -32,12 +33,12 @@ module GO
         next if term_info["id"].nil?
         info[term_info["id"]] = term_info
       }
-    info.close
+      info
+    end
   end
   def self.info
-    self.init unless File.exists? TSV_GENE_ONTOLOGY
-    TCHash.get(TSV_GENE_ONTOLOGY)
+    self.init
   end
   def self.goterms

data/lib/rbbt/sources/organism.rb CHANGED Viewed

@@ -1,19 +1,39 @@
 require 'rbbt-util'
-require 'rbbt/util/data_module'
+require 'rbbt/util/resource'
 module Organism
+  extend Resource
+  relative_to Rbbt, "share/organisms"
   class OrganismNotProcessedError < StandardError; end
   def self.datadir(org)
     File.join(Rbbt.datadir, 'organisms', org)
   end
-  def self.normalize(org, list, field = nil, others = nil, options = {})
+  def self.attach_translations(org, tsv, target = nil, fields = nil, options = {})
+    Log.high "Attaching Translations for #{ org.inspect }, target #{target.inspect}, fields #{fields.inspect}"
+    options = Misc.add_defaults options, :persistence => true, :case_insensitive => false
+    options.merge! :key    => target unless target.nil?
+    options.merge! :fields => fields unless fields.nil?
+    index = identifiers(org).tsv options
+    tsv.attach index, [:key]
+  end
+  def self.normalize(org, list, target = nil, fields = nil, options = {})
     return [] if list.nil? or list.empty?
     options = Misc.add_defaults options, :persistence => true, :case_insensitive => true, :double => false
     double = Misc.process_options options, :double
+    options.merge! :target => target unless target.nil?
+    options.merge! :fields => fields unless fields.nil?
+    index = identifiers(org).index options
     if Array === list
       if double
         index.values_at *list
@@ -36,11 +56,11 @@ module Organism
   end
   def self.organisms
-    Dir.glob(File.join(PKGData.sharedir_for_file(__FILE__), 'install/Organism/*/Rakefile')).collect{|f| File.basename(File.dirname(f))}
+    Dir.glob(File.join(Rbbt.share.organisms.find, '*')).collect{|f| File.basename(f)}
   end
   def self.name(organism)
-    Open.read(Organism.scientific_name(organism)).strip
+    Organism.scientific_name(organism).read.strip
   end
   def self.organism(name)
@@ -48,9 +68,15 @@ module Organism
       organism == name or Organism.name(organism) =~ /#{ name }/i
     }.first
   end
-  extend DataModule
-  Hsa = with_key('Hsa')
-  Sce = with_key('Sce')
+  ["Hsa", "Sce"].each do |organism|
+    rakefile = Rbbt["share/install/Organism/#{ organism }/Rakefile"]
+    rakefile.lib_dir = Resource.caller_lib_dir __FILE__
+    rakefile.pkgdir = 'phgx'
+    Organism[organism].define_as_rake rakefile
+    module_eval "#{ organism } = with_key '#{organism}'"
+  end
 end

data/lib/rbbt/sources/organism/sequence.rb ADDED Viewed

@@ -0,0 +1,337 @@
+require 'rbbt/sources/organism'
+require 'rbbt/util/workflow'
+require 'bio'
+# Sequence analyses
+module Organism
+  extend WorkFlow
+  def self.coding_transcripts_for_exon(org, exon, exon_transcripts, transcript_info)
+    exon_transcripts ||= Organism.transcript_exons(org).tsv(:double, :key => "Ensembl Exon ID", :fields => ["Ensembl Transcript ID"], :merge => true, :persistence => true )
+    transcript_info  ||= Organism.transcripts.tsv(org).tsv(:list, :persistence => true )
+    transcripts = exon_transcripts[exon].first
+    transcripts.select{|transcript| transcript_info[transcript]["Ensembl Protein ID"].any?}
+  end
+  def self.codon_at_transcript_position(org, transcript, offset, transcript_sequence = nil, transcript_5utr = nil)
+    transcript_sequence ||= Organism.transcript_sequence(org).tsv(:single, :persistence => true)
+    transcript_5utr ||= Organism.transcript_5utr(org).tsv(:single, :persistence => true, :cast => 'to_i')
+    utr5 = transcript_5utr[transcript]
+    raise "UTR5 for transcript #{ transcript } was missing" if utr5.nil?
+    return nil if utr5 > offset
+    sequence = transcript_sequence[transcript]
+    raise "Sequence for transcript #{ transcript } was missing" if sequence.nil? if sequence.nil?
+    ccds_offset = offset - utr5
+    return nil if ccds_offset > sequence.length
+    range = (utr5..-1)
+    sequence = sequence[range]
+    codon = ccds_offset / 3
+    codon_offset =  ccds_offset % 3
+    [sequence[(codon * 3)..((codon + 1) * 3 - 1)], codon_offset, codon]
+  end
+  def self.codon_change(allele, codon, offset)
+    original = Bio::Sequence::NA .new(codon).translate
+    codon = codon.dup
+    codon[offset] = allele
+    new = Bio::Sequence::NA .new(codon).translate
+    [original, new]
+  end
+  def self.genes_at_chromosome_positions(org, chromosome, positions)
+    chromosome = chromosome.to_s
+    chromosome_bed = Persistence.persist(Organism.gene_positions(org), "Gene_positions[#{chromosome}]", :fwt, :chromosome => chromosome, :range => true) do |file, options|
+      tsv = file.tsv(:persistence => false, :type => :list)
+      tsv.select("Chromosome Name" => chromosome).collect do |gene, values|
+        [gene, values.values_at("Gene Start", "Gene End").collect{|p| p.to_i}]
+      end
+    end
+    if Array === positions
+      positions.collect{|position| pos = chromosome_bed[position]; pos.nil? ? nil : pos.first}
+    else
+      pos = chromosome_bed[positions];
+      pos.nil? ? nil : pos.first
+    end
+  end
+  def self.genes_at_genomic_positions(org, positions)
+    positions = [positions] unless Array === positions.first
+    genes = []
+    chromosomes = {}
+    indices     = {}
+    positions.each_with_index do |info,i|
+      chr, pos = info
+      chromosomes[chr] ||= []
+      indices[chr] ||= []
+      chromosomes[chr] << pos
+      indices[chr] << i
+    end
+    chromosomes.each do |chr, pos_list|
+      chr_genes = genes_at_chromosome_positions(org, chr, pos_list)
+      chr_genes.zip(indices[chr]).each do |gene, index| genes[index] = gene end
+    end
+    genes
+  end
+  def self.exons_at_chromosome_positions(org, chromosome, positions)
+    chromosome = chromosome.to_s
+    chromosome_bed = Persistence.persist(Organism.exons(org), "Exon_positions[#{chromosome}]", :fwt, :chromosome => chromosome, :range => true) do |file, options|
+      tsv = file.tsv(:persistence => true, :type => :list)
+      tsv.select("Chromosome Name" => chromosome).collect do |exon, values|
+        [exon, values.values_at("Exon Chr Start", "Exon Chr End").collect{|p| p.to_i}]
+      end
+    end
+    if Array === positions
+      positions.collect{|position|
+        chromosome_bed[position];
+      }
+    else
+      chromosome_bed[positions];
+    end
+  end
+  def self.exons_at_genomic_positions(org, positions)
+    positions = [positions] unless Array === positions.first
+    exons = []
+    chromosomes = {}
+    indices     = {}
+    positions.each_with_index do |info,i|
+      chr, pos = info
+      chromosomes[chr] ||= []
+      indices[chr] ||= []
+      chromosomes[chr] << pos
+      indices[chr] << i
+    end
+    chromosomes.each do |chr, pos_list|
+      chr_exons = exons_at_chromosome_positions(org, chr, pos_list)
+      chr_exons.zip(indices[chr]).each do |exon, index| exons[index] = exon end
+    end
+    exons
+  end
+  def self.exon_offset_in_transcript(org, exon, transcript, exons = nil, transcript_exons = nil)
+    exons            ||= Organism.exons(org).tsv(:persistence => true)
+    transcript_exons ||= Organism.transcript_exons(org).tsv(:double, :fields => ["Ensembl Exon ID","Exon Rank in Transcript"], :persistence => true)
+    sizes = [0]
+    rank = nil
+    transcript_exons[transcript].zip_fields.each do |_exon, _rank|
+      _rank = _rank.to_i
+      s, e = exons[_exon].values_at("Start", "End")
+      size = e.to_i - s.to_i + 1
+      sizes[_rank] =  size
+      rank = _rank if _exon == exon
+    end
+    if not rank.nil?
+      sizes[0..rank - 1].inject(0){|e,acc| acc += e}
+    else
+      nil
+    end
+  end
+  def self.exon_transcript_offsets(org, exons, exon_offsets = nil, exon_info = nil)
+    exon_info       ||= Organism.exons(org).tsv(:persistence => true)
+    exon_offsets    ||= Organism.exon_offsets(org).tsv(:double, :persistence => true)
+    exons = [exons] unless Array === exons
+    transcript_offsets = {}
+    exons.each do |exon|
+      transcript_offsets[exon] ||= {}
+      offsets = exon_offsets[exon].zip_fields
+      offsets.collect do |transcript, offset|
+        next if transcript.empty?
+        transcript_offsets[exon][transcript] = offset.to_i
+      end
+    end
+    transcript_offsets
+  end
+  def self.genomic_position_transcript_offsets(org, positions, exon_offsets = nil, exon_start = nil, exon_end = nil, exon_strand = nil)
+    exon_offsets ||= Organism.exon_offsets(org).tsv(:double, :persistence => true)
+    exon_start   ||= Organism.exons(org).tsv(:single, :persistence => true, :fields => ["Exon Chr Start"], :cast => :to_i)
+    exon_end     ||= Organism.exons(org).tsv(:single, :persistence => true, :fields => ["Exon Chr End"], :cast => :to_i)
+    exon_strand  ||= Organism.exons(org).tsv(:single, :persistence => true, :fields => ["Exon Strand"], :cast => :to_i)
+    exons = exons_at_genomic_positions(org, positions)
+    offsets        = Organism.exon_transcript_offsets(org, exons.flatten.uniq, exon_offsets, exon_info)
+    position_exons = {}
+    positions.zip(exons).each do |position,pos_exons| position_exons[position] = pos_exons end
+    position_offsets = {}
+    position_exons.each do |position,pos_exons|
+      chr, pos = position
+      next if pos_exons.nil? or pos_exons.empty?
+      pos_exons.each do |exon|
+        if offsets.include? exon
+          if exon_strand[exon] == 1
+            offset_in_exon = (pos.to_i - exon_start[exon].to_i)
+          else
+            offset_in_exon = (exon_end[exon] - pos.to_i)
+          end
+          position_offsets[position] ||= {}
+          offsets[exon].each do |transcript, offset|
+            if not offset.nil?
+              position_offsets[position][transcript] = [offset  + offset_in_exon, exon_strand[exon]]
+            end
+          end
+        end
+      end
+    end
+    position_offsets
+  end
+  task_option :org, "Organism", :string
+  task_option :genomic_mutations, "Position (chr:position), Allele", :tsv
+  task :genomic_mutation_to_protein_mutation => :tsv do |org, genomic_mutations|
+    positions = genomic_mutations.keys.collect{|l| l.split(":")}
+    step(:prepare, "Prepare Results")
+    results = TSV.new({})
+    results.key_field = "Position"
+    results.fields = ["Ensembl Transcript ID", "Mutation"]
+    results.type = :double
+    step(:resources, "Load Resources")
+    transcript_sequence = Organism.transcript_sequence(org).tsv(:single, :persistence => true)
+    transcript_5utr     = Organism.transcript_5utr(org).tsv(:single, :persistence => true, :cast => 'to_i')
+    exon_offsets        = Organism.exon_offsets(org).tsv(:double, :persistence => true)
+    exon_start          = Organism.exons(org).tsv(:single, :persistence => true, :fields => ["Exon Chr Start"], :cast => :to_i)
+    exon_end            = Organism.exons(org).tsv(:single, :persistence => true, :fields => ["Exon Chr End"], :cast => :to_i)
+    exon_strand         = Organism.exons(org).tsv(:single, :persistence => true, :fields => ["Exon Strand"], :cast => :to_i)
+    step(:offsets, "Find transcripts and offsets for mutations")
+    offsets = Organism.genomic_position_transcript_offsets(org, positions, exon_offsets, exon_start, exon_end, exon_strand)
+    step(:aminoacid, "Translate mutation to amino acid substitutions")
+    offsets.each do |position, transcripts|
+      alleles = genomic_mutations[position * ":"].collect{|allele| Misc.IUPAC_to_base(allele)}.flatten
+      transcripts.each do |transcript, offset_info|
+        offset, strand = offset_info
+        ddd strand
+        begin
+          codon = Organism.codon_at_transcript_position(org, transcript, offset, transcript_sequence, transcript_5utr)
+        rescue
+          Log.medium $!.message
+          next
+        end
+        ddd codon
+        if not codon.nil?
+          alleles.each do |allele|
+            ddd allele
+            allele = Misc::BASE2COMPLEMENT[allele] if strand == -1
+            ddd allele
+            change = Organism.codon_change(allele, *codon.values_at(0,1))
+            pos_code = position * ":"
+            mutation = [change.first, codon.last + 1, change.last] * ""
+            if results.include? pos_code
+              results[pos_code] = results[pos_code].merge [transcript, mutation]
+            else
+              results[pos_code] = [[transcript], [mutation]]
+            end
+          end
+        end
+      end
+    end
+    results
+  end
+end
+if __FILE__ == $0
+  require 'rbbt/util/log'
+  require 'benchmark'
+  select = <<-EOF
+3:64581875
+  EOF
+  select = select.split("\n").collect{|l| l.split(":")}
+  picmi_test = <<-EOF
+#Chromosome	Name	Position	Reference	Tumor
+1	100382265	C	G
+1	100380997	A	G
+22	30163533	A	C
+X	10094215	G	A
+X	10085674	C	T
+20	50071099	G	T
+21	19638426	G	T
+2	230633386	C	T
+2	230312220	C	T
+1	100624830	T	A
+4	30723053	G	T
+  EOF
+  # Build 37
+  picmi_test = <<-EOF
+#Chromosome	Name	Position	Reference	Tumor
+1	100624830	T	A
+21 19638426 G T
+  EOF
+#  # Build 36
+#  picmi_test = <<-EOF
+##Chromosome	Name	Position	Reference	Tumor
+#3 81780820 T C
+#2 43881517 A T
+#2 43857514 T C
+#6 88375602 G A
+#16 69875502 G T
+#16 69876078 T C
+#16 69877147 G A
+#17 8101874 C T
+#  EOF
+  Log.severity = 2
+  org = 'Hsa/may2009'
+  file = File.join(ENV["HOME"], 'git/rbbt-util/integration_test/data/Metastasis.tsv')
+  #positions = TSV.new(StringIO.new(picmi_test), :list, :sep => /\s+/, :fix => Proc.new{|l| l.sub(/\s+/,':')})
+  positions = TSV.new(file, :list, :fix => Proc.new{|l| l.sub(/\t/,':')})
+  positions.key_field = "Position"
+  positions.fields = %w(Reference Control Tumor)
+  #positions.fields = %w(Reference Tumor)
+  #puts positions.slice(["Reference", "Tumor"]).to_s.split(/\n/).collect{|line| next if line =~ /#/; parts = line.split(/\t|:/); parts[3] = Misc.IUPAC_to_base(parts[3]).first; parts * ","}.compact * "\n"
+  #positions =  positions.select ["10:98099540"]
+  Organism.basedir = Rbbt.tmp.organism.sequence.jobs.find :user
+  job =  Organism.job :genomic_mutation_to_protein_mutation, "Metastasis", org, positions.slice("Tumor")
+  job.run
+  while not job.done?
+    puts job.step
+    sleep 2
+  end
+  raise job.messages.last if job.error?
+  mutations = job.load
+end