RubyGems - rbbt-sources - Versions diffs - 2.1.7 → 3.0.0 - Mend

rbbt-sources 2.1.7 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/lib/rbbt/sources/kegg.rb +127 -0
data/lib/rbbt/sources/matador.rb +9 -0
data/lib/rbbt/sources/pharmagkb.rb +9 -0
data/lib/rbbt/sources/pina.rb +35 -0
data/lib/rbbt/sources/stitch.rb +9 -0
data/lib/rbbt/sources/string.rb +27 -0
data/share/install/KEGG/Rakefile +114 -0
data/share/install/PharmaGKB/Rakefile +211 -0
data/share/install/Pina/Rakefile +16 -0
data/share/install/STITCH/Rakefile +30 -0
data/share/install/STRING/Rakefile +8 -0
metadata +12 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: daf367338fb6e78d2cb7b76440e67712d27f34ab
-  data.tar.gz: 5b7a7308779ec4441fa2eb997d6f9b7f0dd37e3a
+  metadata.gz: 00cd4a9602b9ea2637a620b30cd3d48a6d63a9fe
+  data.tar.gz: c282f8c86de5148343e5a83ea524cdc09435b9fb
 SHA512:
-  metadata.gz: bb568b0d788284e82d0ac0d9cdbd14db7c0e59b4977ddce57e2701f25ca18bbef93d43424179a188f73daaacc87963d039a17aaf0916872945f2d384e6441552
-  data.tar.gz: b24f422176f10f518f692a7878c2389df0276df27a074feb5918bae1993f860fae4558d330f95de66a7857da712b5c811e487c0b117f553106215d1065f856af
+  metadata.gz: 701a67455ca18d9c705e2c409628cd5463f7449d2ee40ba4d26cce6f203018db21b9c6ee6f233cf8d80e44e28d3ffcfa08e474678b538b8db7cb80c44e5eac5a
+  data.tar.gz: 4bbcf6f222c01c5f3314617ed7c2458b3cebb9d8b3293ac631305ea2c610c935792fe0e5d6a7402f041aab4304e5586a7989f1e2a097b5dc620f0cb7a208250c

data/lib/rbbt/sources/kegg.rb ADDED

@@ -0,0 +1,127 @@
+require 'rbbt'
+require 'rbbt/resource'
+module KEGG
+  extend Resource
+  self.pkgdir = "phgx"
+  self.subdir = "share/kegg"
+  KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
+  def self.names
+    @@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
+  end
+  def self.descriptions
+    @@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
+  end
+  def self.index2genes
+    @@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
+  end
+  def self.index2ens
+    @@index2ens ||= KEGG.identifiers.index(:persist => true)
+  end
+  def self.index2kegg
+    @@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
+  end
+  def self.id2name(id)
+    names[id]
+  end
+  def self.name2id(name)
+    names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
+  end
+  def self.description(id)
+    descriptions[id]
+  end
+end
+if defined? Entity
+  module KeggPathway
+    extend Entity
+    self.format = "KEGG Pathway ID"
+    self.annotation :organism
+    def self.filter(query, field = nil, options = nil, entity = nil)
+      return true if query == entity
+      return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
+      false
+    end
+    property :name => :single2array do
+      return nil if self.nil?
+      name = KEGG.id2name(self)
+      name.sub(/ - Homo.*/,'') unless name.nil?
+    end
+    property :description => :single2array do
+      KEGG.description(self)
+    end
+    property :genes => :array2single do |*args|
+      organism = args.first || self.organism
+      KEGG.index2genes.values_at(*self).
+        each{|gene| gene.organism = organism if gene.respond_to? :organism }
+    end
+  end
+  if defined? Gene and Entity === Gene
+    module Gene
+      self.format = "KEGG Gene ID"
+      def to_kegg
+        return self if format == "KEGG Gene ID"
+        if Array === self
+          Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
+        else
+          Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
+        end
+      end
+      def from_kegg
+        return self unless format == "KEGG Gene ID"
+        if Array === self
+          Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
+        else
+          Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
+        end
+      end
+      def self.gene_kegg_pathway_index
+        @@gene_kegg_pathway_index ||=
+          KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
+      end
+      property :to => :array2single do |new_format|
+        case
+        when format == new_format
+          self
+        when format == "KEGG Gene ID"
+          ensembl = from_kegg.clean_annotations
+          Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
+        when new_format == "KEGG Gene ID"
+          to_kegg
+        else
+          Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
+        end
+      end
+      property :kegg_pathways => :array2single do
+        @kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
+          each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
+      end
+    end
+  end
+end

data/lib/rbbt/sources/matador.rb ADDED

@@ -0,0 +1,9 @@
+require 'phgx'
+module Matador
+  extend Resource
+  self.pkgdir = "phgx"
+  self.subdir = "share/matador"
+  Matador.claim Matador.root, :rake, Rbbt.share.install.Matador.Rakefile.find(:lib)
+end

data/lib/rbbt/sources/pharmagkb.rb ADDED

@@ -0,0 +1,9 @@
+require 'phgx'
+module PharmaGKB
+  extend Resource
+  self.pkgdir = "phgx"
+  self.subdir = "share/pharmagkb"
+  PharmaGKB.claim PharmaGKB.root, :rake, Rbbt.share.install.PharmaGKB.Rakefile.find(:lib)
+end

data/lib/rbbt/sources/pina.rb ADDED

@@ -0,0 +1,35 @@
+require 'phgx'
+module Pina
+  extend Resource
+  self.pkgdir = "phgx"
+  self.subdir = "share/pina"
+  Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
+end
+if defined? Entity and defined? Gene and Entity === Gene
+  require 'rbbt/entity/gene'
+  require 'rbbt/entity/interactor'
+  require 'rbbt/sources/PSI_MI'
+  module Gene
+    property :pina_interactors => :array2single do
+      ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
+      pina        = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
+      int = self.ensembl.collect do |ens|
+        uniprot = ens2uniprot[ens]
+        list = pina.values_at(*uniprot).compact.collect do |v|
+          Misc.zip_fields(v).collect do |o, method, articles|
+            Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
+          end
+        end.flatten.uniq
+        Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
+      end
+      Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
+    end
+  end
+end

data/lib/rbbt/sources/stitch.rb ADDED

@@ -0,0 +1,9 @@
+require 'phgx'
+module STITCH
+  extend Resource
+  self.pkgdir = "phgx"
+  self.subdir = "share/stitch"
+  STITCH.claim STITCH.root, :rake, Rbbt.share.install.STITCH.Rakefile.find(:lib)
+end

data/lib/rbbt/sources/string.rb ADDED

@@ -0,0 +1,27 @@
+require 'phgx'
+module STRING
+  extend Resource
+  self.pkgdir = "phgx"
+  self.subdir = "share/string"
+  STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
+end
+if defined? Entity and defined? Gene and Entity === Gene
+  module Gene
+    property :string_interactors => :array2single do |*args|
+      threshold = args.first || 800
+      string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
+      all = self.ensembl.collect do |gene|
+        interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
+        Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
+      end
+      all.compact.first.annotate all if Annotated === all.compact.first
+      all
+    end
+  end
+end

data/share/install/KEGG/Rakefile ADDED

@@ -0,0 +1,114 @@
+require File.join(File.dirname(__FILE__),'../lib/rake_helper')
+define_source_tasks  "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
+  "hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
+  "drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
+  "pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
+file :identifiers => 'source/h.sapiens' do |t|
+  pairs = {}
+  entry = nil
+  Open.read(t.prerequisites.first).each do |line|
+    if line =~ /^ENTRY\s+(\d+)/
+      entry = $1
+      next
+    end
+    if line =~ /Ensembl: (ENSG\d+)/
+      pairs[entry] = $1
+    end
+  end
+  Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
+end
+file :gene_drug => 'source/drugs' do |t|
+  pairs = {}
+  drug = nil
+  Open.read(t.prerequisites.first).
+    scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
+      if line =~ /^ENTRY\s+(\w+)/
+        drug = $1
+        next
+      end
+      if line =~ /TARGET.*?\[HSA:(.*?)\]/
+        genes = $1.split(/\s/)
+        genes.each do |gene|
+          pairs[gene] ||= []
+          pairs[gene] << drug
+        end
+      end
+  end
+  Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
+end
+file :drugs => 'source/drugs' do |t|
+  info = {}
+  drug = nil
+  Open.read(t.prerequisites.first).
+    scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
+      if line =~ /^ENTRY\s+(\w+)/
+        drug = $1
+        next
+      end
+      if line =~ /^NAME(.*)/
+        names = $1.split(/;/)
+        names.each do |name|
+          info[drug] ||= [[],[]]
+          info[drug][0] << name.chomp.strip
+        end
+      end
+      if line =~ /^DBLINKS(.*)/
+        $1.match(/PubChem: (\d*)/)
+        pubchem = $1
+        next unless pubchem
+        info[drug] ||= [[],[]]
+        info[drug][1] << pubchem.chomp.strip
+      end
+  end
+  Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
+end
+file :pathways => 'source/pathways' do |t|
+  descs = {}
+  names = {}
+  klass = {}
+  pathway = nil
+  Open.read(t.prerequisites.first).split(/\n/).each do |line|
+    if line =~ /ENTRY\s+(\w+)/
+      pathway = $1.strip
+    end
+    if line =~ /NAME (.*)/
+      names[pathway] = $1.strip
+    end
+    if line =~ /DESCRIPTION (.*)/
+      descs[pathway] = $1.strip
+    end
+    if line =~ /CLASS (.*)/
+      klass[pathway] = $1.strip
+    end
+  end
+  Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
+end
+process_tsv :gene_pathway, 'hsa_gene_map.tab',
+  :sep2 => ' ' do
+  headers ['KEGG Gene ID', 'KEGG Pathway ID']
+  data do |gene, pathway|
+    "hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
+  end
+end
+add_to_defaults [:pathways, :drugs, :gene_drug, :genes]

data/share/install/PharmaGKB/Rakefile ADDED

@@ -0,0 +1,211 @@
+require File.join(File.dirname(__FILE__),'../lib/rake_helper')
+define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
+  "drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
+  "diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
+  "relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
+  "variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
+  "pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
+process_tsv :diseases, 'diseases',
+  :header_hash => "",
+  :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
+  headers ['PhGKB Disease ID']
+end
+process_tsv :identifiers, 'genes',
+  :header_hash => "",
+  :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
+  headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
+end
+process_tsv :drugs, 'drugs',
+  :header_hash => "",
+  :fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
+  :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
+  headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
+end
+process_tsv :relationships, 'relationships',
+  :header_hash => "",
+  :merge => true,
+  :fix => proc{|l|
+    l.gsub!(/Gene:|Drug:|Disease:/,'')
+    parts = l.split("\t")
+    rels = parts.pop
+    parts = [parts.values_at(0, 2) * ":"]
+    pmids = []
+    pathways = []
+    rsids = []
+    rels.split(',').each do |r|
+      case
+      when r =~ /PMID:(.*)/
+        pmids << $1
+      when r =~ /Pathway:(.*)/
+        pathways << $1
+      when r =~ /RSID:(.*)/
+        rsids << $1
+      end
+    end
+    parts << pmids * "|"
+    parts << pathways * "|"
+    parts << rsids * "|"
+    parts * "\t"
+  },
+  :keep_empty => true do
+  headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
+end
+process_tsv :gene_drug, 'relationships',
+  :select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
+  :header_hash => "",
+  :merge => true,
+  :fix => proc{|l|
+    l.gsub!(/Gene:|Drug:|Disease:/,'')
+    parts = l.split("\t")
+    rels = parts.pop
+    parts = parts.values_at 0, 2
+    parts * "\t"
+  },
+  :keep_empty => true do
+  headers ['PhGKB Gene ID', 'PhGKB Drug ID']
+end
+process_tsv :gene_disease, 'relationships',
+  :select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
+  :key_field => 1,
+  :fields => 3,
+  :merge => true,
+  :header_hash => "",
+  :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
+  :keep_empty => true do
+  headers ['PhGKB Gene ID', 'PhGKB Disease ID']
+end
+process_tsv :variants, 'variants',
+  :key_field => 1,
+  :fields => [3,7,8,9,10,4,6,5],
+  :header_hash => "",
+  :merge => true,
+  :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
+  :keep_empty => true do
+  headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
+end
+file :pathways => 'source/pathways' do |t|
+  File.open(t.name, 'w') do |f|
+    f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
+    Open.read(t.prerequisites.first).split(/\n/).each do |line|
+      case
+      when line =~ /(PA\d+): (.*) - \((.*)\)/
+        f.puts [$1,$2,$3] * "\t"
+      when line =~ /(PA\d+): (.*)/
+        f.puts [$1,$2,""] * "\t"
+      end
+    end
+  end
+end
+file :gene_pathway => 'source/pathways' do |t|
+  pathways = {}
+  last_pathway = nil
+  Open.read(t.prerequisites.first).split(/\n/).each do |line|
+    if line =~ /(P.*):(.*)/
+      last_pathway = $1
+      pathways[last_pathway] = {:name => $2}
+    else
+      type, code, name = line.split(/\t/)
+      next unless type =='Gene'
+      pathways[last_pathway][:genes] ||= []
+      pathways[last_pathway][:genes] << name
+    end
+  end
+end
+file :gene_pathway => 'source/pathways' do |t|
+  pathways = {}
+  last_pathway = nil
+  Open.read(t.prerequisites.first).split(/\n/).each do |line|
+    if line =~ /(P.*):(.*)/
+      last_pathway = $1
+      pathways[last_pathway] = {:name => $2}
+    else
+      type, code, name = line.split(/\t/)
+      next unless type =='Gene'
+      pathways[last_pathway][:genes] ||= []
+      pathways[last_pathway][:genes] << name
+    end
+  end
+  File.open(t.name, 'w') do |f|
+    f.puts "#" + ['PhGKB Pathway ID',  'Pathway Name',  'Associated Gene Name'] * "\t"
+    pathways.each do |pathway, info|
+      next if info[:genes].nil?
+      f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
+    end
+  end
+end
+file :pathway_drugs => 'source/pathways' do |t|
+  pathways = {}
+  last_pathway = nil
+  Open.read(t.prerequisites.first).split(/\n/).each do |line|
+    if line =~ /(P.*):(.*)/
+      last_pathway = $1
+      pathways[last_pathway] = {:name => $2}
+    else
+      type, code, name = line.split(/\t/)
+    next unless type =='Drug'
+      pathways[last_pathway][:drugs] ||= []
+      pathways[last_pathway][:drugs] << code
+    end
+  end
+  File.open(t.name, 'w') do |f|
+    f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
+    pathways.each do |pathway, info|
+      next if info[:drugs].nil?
+      f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
+    end
+  end
+end
+file :disease_pathway => 'source/pathways' do |t|
+  pathways = {}
+  last_pathway = nil
+  Open.read(t.prerequisites.first).split(/\n/).each do |line|
+    if line =~ /(P.*):(.*)/
+      last_pathway = $1
+      pathways[last_pathway] = {:name => $2}
+    else
+      type, code, name = line.split(/\t/)
+    next unless type =='Disease'
+      pathways[last_pathway][:diseases] ||= []
+      pathways[last_pathway][:diseases] << name
+    end
+  end
+  File.open(t.name, 'w') do |f|
+    f.puts "#" + %w(ID Name Diseases) * "\t"
+    pathways.each do |pathway, info|
+      next if info[:diseases].nil?
+      f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
+    end
+  end
+end
+add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]

data/share/install/Pina/Rakefile ADDED

@@ -0,0 +1,16 @@
+require File.join(File.dirname(__FILE__),'../lib/rake_helper')
+define_source_tasks  "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
+process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
+  :key         => 0,
+  :fix         => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
+  :fields      => [1,6,8],
+  :header_hash => "#",
+  :merge       => true,
+  :keep_empty  => true do
+  headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
+end

data/share/install/STITCH/Rakefile CHANGED

@@ -0,0 +1,30 @@
+require File.join(File.dirname(__FILE__),'../lib/rake_helper')
+define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
+  "chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
+process_tsv :protein_chemical, 'protein_chemicals',
+  :key => 1,
+  :grep => "9606\.",
+  :fix => lambda{|l| l.sub(/9606\./,'')},
+  :keep_empty => true do
+  headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
+end
+$grep_re = []
+process_tsv :chemicals, 'chemicals',
+  :grep => $grep_re,
+  :key  => 0 do
+  Rake::Task['protein_chemical'].invoke
+  Log.debug "Getting chemicals"
+  chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
+  Log.debug "Getting chemicals [done]"
+  $grep_re.replace chemicals
+  headers ['STITCH Chemical ID', 'Name', 'Source']
+end

data/share/install/STRING/Rakefile ADDED

@@ -0,0 +1,8 @@
+require File.join(File.dirname(__FILE__),'../lib/rake_helper')
+define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
+process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
+  headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-sources
 version: !ruby/object:Gem::Version
-  version: 2.1.7
+  version: 3.0.0
 platform: ruby
 authors:
 - Miguel Vazquez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-02-21 00:00:00.000000000 Z
+date: 2014-02-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rbbt-util
@@ -105,24 +105,34 @@ files:
 - lib/rbbt/sources/go.rb
 - lib/rbbt/sources/gscholar.rb
 - lib/rbbt/sources/jochem.rb
+- lib/rbbt/sources/kegg.rb
+- lib/rbbt/sources/matador.rb
 - lib/rbbt/sources/organism.rb
 - lib/rbbt/sources/pfam.rb
+- lib/rbbt/sources/pharmagkb.rb
+- lib/rbbt/sources/pina.rb
 - lib/rbbt/sources/polysearch.rb
 - lib/rbbt/sources/pubmed.rb
 - lib/rbbt/sources/reactome.rb
+- lib/rbbt/sources/stitch.rb
+- lib/rbbt/sources/string.rb
 - lib/rbbt/sources/tfacts.rb
 - lib/rbbt/sources/uniprot.rb
 - lib/rbbt/sources/wgEncodeBroadHmm.rb
 - share/Ensembl/release_dates
 - share/install/Genomes1000/Rakefile
 - share/install/JoChem/Rakefile
+- share/install/KEGG/Rakefile
 - share/install/NCI/Rakefile
 - share/install/Organism/Hsa/Rakefile
 - share/install/Organism/Mmu/Rakefile
 - share/install/Organism/Rno/Rakefile
 - share/install/Organism/Sce/Rakefile
 - share/install/Organism/organism_helpers.rb
+- share/install/PharmaGKB/Rakefile
+- share/install/Pina/Rakefile
 - share/install/STITCH/Rakefile
+- share/install/STRING/Rakefile
 - share/install/lib/helpers.rb
 - test/rbbt/sources/test_biomart.rb
 - test/rbbt/sources/test_entrez.rb