RubyGems - rbbt-sources - Versions diffs - 0.1.0 → 0.2.0 - Mend

rbbt-sources 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/lib/rbbt/sources/biomart.rb +34 -12
data/lib/rbbt/sources/entrez.rb +4 -1
data/lib/rbbt/sources/go.rb +37 -38
data/lib/rbbt/sources/organism.rb +7 -1
data/share/install/Organism/Hsa/Rakefile +83 -0
data/share/install/Organism/Sce/Rakefile +118 -0
data/share/install/lib/helpers.rb +47 -0
data/test/rbbt/sources/test_biomart.rb +15 -10
data/test/rbbt/sources/test_entrez.rb +2 -2
data/test/rbbt/sources/test_go.rb +0 -3
data/test/rbbt/sources/test_organism.rb +17 -0
data/test/rbbt/sources/test_pubmed.rb +1 -1
metadata +25 -6

data/lib/rbbt/sources/biomart.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'rbbt'
-require 'rbbt/util/open'
+require 'rbbt-util'
+require 'rbbt/util/log'
 # This module interacts with BioMart. It performs queries to BioMart and
 # synthesises a hash with the results. Note that this module connects to the
@@ -9,6 +9,7 @@ require 'rbbt/util/open'
 module BioMart
   class BioMart::QueryError < StandardError; end
   private
   @@biomart_query_xml = <<-EOT
@@ -25,8 +26,7 @@ module BioMart
-  def self.get(database, main, attrs = nil, filters = nil, data = nil, options = {})
+  def self.get(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
     attrs   ||= []
     filters ||= ["with_#{main}"]
     data    ||= {}
@@ -37,7 +37,7 @@ module BioMart
     query.sub!(/<!--MAIN-->/,"<Attribute name = \"#{main}\" />")
     query.sub!(/<!--ATTRIBUTES-->/, attrs.collect{|name| "<Attribute name = \"#{ name }\"/>"}.join("\n") )
-    response = Open.read('http://www.biomart.org/biomart/martservice?query=' + query.gsub(/\n/,' '), options)
+    response = Open.read('http://www.biomart.org/biomart/martservice?query=' + query.gsub(/\n/,' '), open_options)
     if response =~ /Query ERROR:/
       raise BioMart::QueryError, response
     end
@@ -51,8 +51,12 @@ module BioMart
       attrs.each{|name|
         value = parts.shift
         data[main][name] ||= []
-        next if value.nil?
-        data[main][name] << value
+        next if value.nil? or value.empty?
+        if data[main][name]
+          data[main][name] = [value]
+        else
+          data[main][name] << value unless data[main][name].include? value
+        end
       }
     }
@@ -75,30 +79,48 @@ module BioMart
   # the BioMart query to remove results with the main attribute empty, this may
   # cause an error if the BioMart WS does not allow filtering with that
   # attribute.
-  def self.query(database, main, attrs = nil, filters = nil, data = nil, options = {})
+  def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
+    open_options = Misc.add_defaults open_options, :nocache => false
     attrs   ||= []
     data    ||= {}
+    Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
+    max_items = 2
     chunks = []
     chunk = []
     attrs.each{|a|
       chunk << a
-      if chunk.length == 2
+      if chunk.length == max_items
         chunks << chunk
         chunk = []
       end
     }
     chunks << chunk if chunk.any?
-    chunks.each{|chunk|
-      data = get(database, main, chunk, filters, data, options)
+    Log.low "Chunks: #{chunks.length}"
+    chunks.each_with_index{|chunk,i|
+      Log.low "Chunk #{ i }: [#{chunk * ", "}]"
+      data = get(database, main, chunk, filters, data, open_options)
     }
     data
   end
+  def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
+    codes = attrs.collect{|attr| attr[1]}
+    data = query(database, main.last, codes, filters, data, open_options)
+    tsv = TSV.new({})
+    data.each do |key, info|
+      tsv[key] = info.values_at(*codes)
+    end
+    tsv.key_field = main.first
+    tsv.fields    = attrs.collect{|attr| attr.first}
+    tsv
+  end
 end

data/lib/rbbt/sources/entrez.rb CHANGED Viewed

@@ -14,7 +14,10 @@ module Entrez
     taxs = [taxs] unless Array === taxs
     options.merge! :grep => taxs
-    TSV.new(Rbbt.find_datafile('gene_info'), options)
+    tsv = TSV.new(Rbbt.find_datafile('gene_info'), options)
+    tsv.key_field = "Entrez Gene ID"
+    tsv.fields    = ["Native ID"]
+    tsv
   end
   def self.entrez2pubmed(taxs)

data/lib/rbbt/sources/go.rb CHANGED Viewed

@@ -4,66 +4,67 @@ require 'rbbt-util'
 # now all it does is provide a translation form id to the actual names.
 module GO
-  @@info = nil
+  Rbbt.add_datafiles :gene_ontology => ['databases/GO', 'ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo'],
+    :goslim_generic => ['databases/GO', 'http://www.geneontology.org/GO_slims/goslim_generic.obo']
   MULTIPLE_VALUE_FIELDS = %w(is_a)
+  TSV_GENE_ONTOLOGY = File.join(TSV.cachedir, 'gene_ontology')
   # This method needs to be called before any translations can be made, it is
   # called automatically the first time the id2name method is called. It loads
   # the gene_ontology.obo file and extracts all the fields, although right now,
   # only the name field is used.
   def self.init
-    @@info = {}
-    File.open(File.join(Rbbt.datadir, 'dbs/go/gene_ontology.obo')).read.
-      split(/\[Term\]/).
-      each{|term|
+    info = TCHash.new(TSV_GENE_ONTOLOGY, true)
+    File.open(Rbbt.find_datafile('gene_ontology')).read.split(/\[Term\]/).each{|term|
         term_info = {}
-        term.split(/\n/).
-          select{|l| l =~ /:/}.
-          each{|l|
-            key, value = l.chomp.match(/(.*?):(.*)/).values_at(1,2)
-            if MULTIPLE_VALUE_FIELDS.include? key.strip
-              term_info[key.strip] ||= []
-              term_info[key.strip] << value.strip
-            else
-              term_info[key.strip] = value.strip
-            end
-          }
-        @@info[term_info["id"]] = term_info
-    }
+        term.split(/\n/). select{|l| l =~ /:/}.each{|l|
+          key, value = l.chomp.match(/(.*?):(.*)/).values_at(1,2)
+          if MULTIPLE_VALUE_FIELDS.include? key.strip
+            term_info[key.strip] ||= []
+            term_info[key.strip] << value.strip
+          else
+            term_info[key.strip] = value.strip
+          end
+        }
+        next if term_info["id"].nil?
+        info[term_info["id"]] = term_info
+      }
+    info.close
   end
   def self.info
-    self.init unless @@info
-    @@info
+    self.init unless File.exists? TSV_GENE_ONTOLOGY
+    TCHash.get(TSV_GENE_ONTOLOGY)
   end
   def self.goterms
-    self.init unless @@info
-    @@info.keys
+    info.keys
   end
   def self.id2name(id)
-    self.init unless @@info
     if id.kind_of? Array
-      @@info.values_at(*id).collect{|i| i['name'] if i}
+      info.values_at(*id).collect{|i| i['name'] if i}
     else
-      return nil if @@info[id].nil?
-      @@info[id]['name']
+      return nil if info[id].nil?
+      info[id]['name']
     end
   end
   def self.id2ancestors(id)
-    self.init unless @@info
     if id.kind_of? Array
-      @@info.values_at(*id).
+      info.values_at(*id).
         select{|i| ! i['is_a'].nil?}.
         collect{|i| i['is_a'].collect{|id|
-          id.match(/(GO:\d+)/)[1] if id.match(/(GO:\d+)/)
-        }.compact
+        id.match(/(GO:\d+)/)[1] if id.match(/(GO:\d+)/)
+      }.compact
       }
     else
-      return [] if @@info[id].nil? || @@info[id]['is_a'].nil?
-      @@info[id]['is_a'].
+      return [] if id.nil? or info[id].nil? or info[id]['is_a'].nil?
+      info[id]['is_a'].
         collect{|id|
         id.match(/(GO:\d+)/)[1] if id.match(/(GO:\d+)/)
       }.compact
@@ -71,14 +72,12 @@ module GO
   end
   def self.id2namespace(id)
-    self.init unless @@info
+    self.init unless info
     if id.kind_of? Array
-      @@info.values_at(*id).collect{|i| i['namespace'] if i}
+      info.values_at(*id).collect{|i| i['namespace'] if i}
     else
-      return nil if @@info[id].nil?
-      @@info[id]['namespace']
+      return nil if info[id].nil?
+      info[id]['namespace']
     end
   end
 end

data/lib/rbbt/sources/organism.rb CHANGED Viewed

@@ -1,9 +1,15 @@
 require 'rbbt-util'
+require 'rbbt/util/data_module'
 module Organism
   class OrganismNotProcessedError < StandardError; end
   def self.datadir(org)
     File.join(Rbbt.datadir, 'organisms', org)
   end
+  extend DataModule
+  Hsa = with_key('Hsa')
+  Sce = with_key('Sce')
 end

data/share/install/Organism/Hsa/Rakefile ADDED Viewed

@@ -0,0 +1,83 @@
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
+require 'rbbt/sources/biomart'
+require 'rbbt/sources/entrez'
+require File.join(File.dirname(__FILE__), '../../lib/helpers')
+$taxs = [559292,4932]
+$native = "SGD ID"
+$url = "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab"
+$biomart_db = 'hsapiens_gene_ensembl'
+$biomart_main = ['Entrez Gene ID', 'entrezgene']
+$biomart_lexicon = [
+  [ 'Associated Gene Name' , "external_gene_id"],
+  [ 'HGNC symbol', "hgnc_symbol"  ],
+  [ 'HGNC automatic gene name', "hgnc_automatic_gene_name"  ],
+  [ 'HGNC curated gene name ', "hgnc_curated_gene_name"  ],
+]
+$biomart_identifiers = [
+  [ 'Ensembl Gene ID', "ensembl_gene_id"  ],
+  [ 'Ensembl Protein ID', "ensembl_peptide_id"  ],
+  [ 'Associated Gene Name', "external_gene_id"  ],
+  [ 'CCDS ID', "ccds"  ],
+  [ 'Protein ID', "protein_id"  ],
+  [ 'RefSeq Protein ID', "refseq_peptide"  ],
+  [ 'Unigene ID', "unigene"  ],
+  [ 'UniProt/SwissProt ID', "uniprot_swissprot"  ],
+  [ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession"  ],
+  [ 'HGNC ID', "hgnc_id", 'HGNC'],
+  [ 'EMBL (Genbank) ID' , "embl"] ,
+  # Affymetrix
+  [ 'AFFY HC G110', 'affy_hc_g110' ],
+  [ 'AFFY HG FOCUS', 'affy_hg_focus' ],
+  [ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
+  [ 'AFFY HG U133A_2', 'affy_hg_u133a_2' ],
+  [ 'AFFY HG U133A', 'affy_hg_u133a' ],
+  [ 'AFFY HG U133B', 'affy_hg_u133b' ],
+  [ 'AFFY HG U95AV2', 'affy_hg_u95av2' ],
+  [ 'AFFY HG U95B', 'affy_hg_u95b' ],
+  [ 'AFFY HG U95C', 'affy_hg_u95c' ],
+  [ 'AFFY HG U95D', 'affy_hg_u95d' ],
+  [ 'AFFY HG U95E', 'affy_hg_u95e' ],
+  [ 'AFFY HG U95A', 'affy_hg_u95a' ],
+  [ 'AFFY HUGENEFL', 'affy_hugenefl' ],
+  [ 'AFFY HuEx', 'affy_huex_1_0_st_v2' ],
+  [ 'AFFY HuGene', 'affy_hugene_1_0_st_v1' ],
+  [ 'AFFY U133 X3P', 'affy_u133_x3p' ],
+  [ 'Agilent WholeGenome',"agilent_wholegenome" ],
+  [ 'Agilent CGH 44b', 'agilent_cgh_44b' ],
+  [ 'Codelink ID', 'codelink' ],
+  [ 'Illumina HumanWG 6 v2', 'illumina_humanwg_6_v2' ],
+  [ 'Illumina HumanWG 6 v3', 'illumina_humanwg_6_v3' ],
+]
+file 'name' do |t|
+  File.open(t.name, 'w') do |f| f.puts "Homo sapiens" end
+end
+file 'lexicon' do |t|
+  lexicon = tsv_file('http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_prev_name&col=gd_aliases&col=gd_name_aliases&col=gd_pub_acc_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag',
+                     "HGNC ID", nil, :flatten => true, :header_hash => '')
+  merge_biomart lexicon, $biomart_db, $biomart_main, $biomart_lexicon, "HGNC ID"
+  File.open(t.name, 'w') do |f| f.puts lexicon end
+end
+file 'identifiers' do |t|
+  identifiers = BioMart.tsv($biomart_db, $biomart_main, $biomart_identifiers)
+  $biomart_identifiers.each do |name, key, prefix|
+    if prefix
+      identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
+    end
+  end
+  File.open(t.name, 'w') do |f| f.puts identifiers end
+end
+task :default => ['name', 'lexicon', 'identifiers']

data/share/install/Organism/Sce/Rakefile ADDED Viewed

@@ -0,0 +1,118 @@
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
+require 'rbbt/sources/biomart'
+require 'rbbt/sources/entrez'
+require File.join(File.dirname(__FILE__), '../../lib/helpers')
+$taxs = [559292,4932]
+$native = "SGD ID"
+$url = "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab"
+$biomart_db = 'scerevisiae_gene_ensembl'
+$biomart_main = ['Entrez Gene ID', 'entrezgene']
+file 'name' do |t|
+  File.open(t.name, 'w') do |f| f.puts "Saccharomyces cerevisiae" end
+end
+file 'lexicon' do |t|
+  lexicon = tsv_file($url, [$native, 0], [3, 4, 5], :keep_empty => true)
+  merge_entrez(lexicon, $taxs, $native, proc{|code| code.sub(/SGD:S0/,'S0') }, proc{|code| code.match(/\tS0/)})
+  merge_biomart(lexicon, $biomart_db, $biomart_main, [['Interpro Description' , "interpro_description"]])
+  lexicon = lexicon.slice(lexicon.fields - ["Entrez Gene ID"])
+  File.open(t.name, 'w') do |f| f.puts lexicon end
+end
+file 'identifiers' do |t|
+  identifiers = tsv_file($url, [$native, 0], [3, 4, 5], :keep_empty => true)
+  merge_entrez(identifiers, $taxs, $native, proc{|code| code.sub(/SGD:S0/,'S0') }, proc{|code| code.match(/\tS0/)})
+  merge_biomart(identifiers, $biomart_db, $biomart_main,
+                [['Associated Gene Name' , "external_gene_id"],
+                  ['Ensembl Gene ID', "ensembl_gene_id"  ],
+                  ['Ensembl Protein ID', "ensembl_peptide_id"  ],
+                  ['RefSeq Protein ID' , "refseq_peptide"] ,
+                  ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
+                  ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
+                  ['Protein ID' , "protein_id"] ,
+                  ['EMBL (Genbank) ID' , "embl"] ,
+                  # Affymetrix
+                  ['Affy yeast 2',"affy_yeast_2"],
+                  ['Affy yg s98', "affy_yg_s98"]])
+  File.open(t.name, 'w') do |f| f.puts identifiers end
+end
+task :default => ['name', 'lexicon', 'identifiers']
+#require __FILE__.sub(/[^\/]*$/,'') + '../rake-include'
+#
+#$name = "Saccharomyces cerevisiae"
+#
+#
+#$native_id = "SGD DB Id"
+#
+#$entrez2native = {
+#  :tax => 559292,
+#  :fix => proc{|code| code.sub(/SGD:S0/,'S0') },
+#  :check => proc{|code| code.match(/^S0/)},
+#}
+#
+#$lexicon = {
+#  :file => {
+#    :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab",
+#    :native => 0,
+#    :extra => [4,3,5]
+#  },
+#  :biomart => {
+#    :database => 'scerevisiae_gene_ensembl',
+#    :main => ['Entrez Gene ID', 'entrezgene'],
+#    :extra => [
+#      ['Interpro Description' , "interpro_description"],
+#    ],
+#    :filter => [],
+#  }
+#
+#}
+#
+#$identifiers = {
+#  :file => {
+#    :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab",
+#    :native => 0,
+#    :extra => [],
+#  },
+#  :biomart => {
+#    :database => 'scerevisiae_gene_ensembl',
+#    :main => ['Entrez Gene ID', 'entrezgene'],
+#    :extra => [
+#      ['Associated Gene Name' , "external_gene_id"],
+#      ['Ensembl Gene ID', "ensembl_gene_id"  ],
+#      ['Ensembl Protein ID', "ensembl_peptide_id"  ],
+#      ['RefSeq Protein ID' , "refseq_peptide"] ,
+#      ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
+#      ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
+#      ['Protein ID' , "protein_id"] ,
+#      ['EMBL (Genbank) ID' , "embl"] ,
+#      # Affymetrix
+#      ['Affy yeast 2',"affy_yeast_2"],
+#      ['Affy yg s98', "affy_yg_s98"],
+#    ],
+#    :filter => [],
+#  }
+#}
+#
+#$go = {
+#  :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/literature_curation/gene_association.sgd.gz",
+#  :code => 1,
+#  :go   => 4,
+#  :pmid => 5,
+#}
+#
+#$query = '"saccharomyces cerevisiae"[All Fields] AND ((("proteins"[TIAB] NOT Medline[SB]) OR "proteins"[MeSH Terms] OR protein[Text Word]) OR (("genes"[TIAB] NOT Medline[SB]) OR "genes"[MeSH Terms] OR gene[Text Word]))  AND hasabstract[text] AND English[lang]'
+#
+#

data/share/install/lib/helpers.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require 'rbbt-util'
+require 'rbbt/sources/biomart'
+require 'rbbt/sources/entrez'
+def tsv_file(url, native, extra, options = {})
+  options = Misc.add_defaults options, :persistence => false, :keep_empty => true
+  case
+  when Array === native
+    options = Misc.add_defaults options, :native => native.last
+    key_field = native.first
+  when (String === native or Integer === native)
+    options = Misc.add_defaults options, :native => native
+    key_field = nil
+  else
+    key_field = nil
+  end
+  case
+  when (Array === extra and Array === extra.first)
+    options = Misc.add_defaults options, :extra => extra.collect{|e| e.last}
+    fields = extra.collect{|e| e.first}
+  when (Array === extra and not Array === extra.first)
+    options = Misc.add_defaults options, :extra => extra
+    fields = (1..extra.length).to_a.collect{|i| "Field#{i}"}
+  else
+    fields = nil
+  end
+  tsv = TSV.new(Open.open(url), options)
+  tsv.key_field ||= key_field
+  tsv.fields ||= fields
+  tsv
+end
+def merge_entrez(data, taxs, native, fix = nil, select = nil)
+  entrez =  Entrez.entrez2native(taxs, :fix => fix, :select => select)
+  entrez.fields = [native]
+  entrez
+  data.smart_merge entrez, native
+end
+def merge_biomart(lexicon, db, native, other, match = nil)
+  match ||= native.first
+  lexicon.smart_merge BioMart.tsv(db, native, other), match
+end

data/test/rbbt/sources/test_biomart.rb CHANGED Viewed

@@ -9,22 +9,27 @@ class TestBioMart < Test::Unit::TestCase
       BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],['with_unknownattr'])
     end
-    data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => true, :wget_options => { :quiet => false})
-    assert(data['856452']['protein_id'].include? 'AAB68382')
-    data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => true, :wget_options => { :quiet => false} )
-    assert(data['856452']['protein_id'].include? 'AAB68382')
-    assert(data['856452']['external_gene_id'].include? 'CUP1-2')
+    data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :wget_options => { :quiet => false})
+    assert(data['852236']['protein_id'].include? 'CAA84864')
+    data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => false, :wget_options => { :quiet => false} )
+    assert(data['852236']['protein_id'].include? 'CAA84864')
+    assert(data['852236']['external_gene_id'].include? 'YBL044W')
   end
   def test_query
-    data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => true, :wget_options => { :quiet => false})
+    data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false})
+    assert(data['852236']['external_gene_id'].include? 'YBL044W')
+  end
-    assert(data['856452']['protein_id'].include? 'AAB68382')
-    assert(data['856452']['external_gene_id'].include? 'CUP1-2')
- end
+  def test_tsv
+    data = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false})
+    assert(data['852236']['Protein ID'].include? 'CAA84864')
+    assert_equal 'Entrez Gene', data.key_field
+    assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
+  end
 end

data/test/rbbt/sources/test_entrez.rb CHANGED Viewed

@@ -3,12 +3,12 @@ require 'rbbt/sources/entrez'
 require 'test/unit'
 class TestEntrez < Test::Unit::TestCase
-  $yeast_tax = 559292
+  $yeast_tax = [559292,4932]
   def test_entrez2native
     tax    = $yeast_tax
     fix    = proc{|line| line.sub(/SGD:S0/,'S0') }
-    select = proc{|line| line.match(/\tSGD:S0/)}
+    select = proc{|line| line.match(/\tS0/)}
     lexicon = Entrez.entrez2native(tax, :fix => fix, :select => select)
     assert(lexicon['855611'].include? 'S000005056')

data/test/rbbt/sources/test_go.rb CHANGED Viewed

@@ -4,7 +4,6 @@ require 'rbbt/sources/go'
 require 'test/unit'
 class TestGo < Test::Unit::TestCase
   def test_go
     assert_match('vacuole inheritance',GO::id2name('GO:0000011'))
     assert_equal(['vacuole inheritance','alpha-glucoside transport'], GO::id2name(['GO:0000011','GO:0000017']))
@@ -17,8 +16,6 @@ class TestGo < Test::Unit::TestCase
   def test_namespace
     assert_equal 'biological_process', GO.id2namespace('GO:0000001')
   end
 end

data/test/rbbt/sources/test_organism.rb ADDED Viewed

@@ -0,0 +1,17 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
+require 'rbbt/sources/organism'
+require 'test/unit'
+class TestEntrez < Test::Unit::TestCase
+  def test_identifiers
+    assert TSV.new(Organism.identifiers('Sce'))['S000006120']["Ensembl Gene ID"].include?('YPL199C')
+    assert TSV.new(Organism::Sce.identifiers)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
+    #assert Organism.identifiers('Hsa')['1020']["Associated Gene Name"].include?('CDK5')
+  end
+  def test_lexicon
+    assert TSV.new(Organism.lexicon('Sce'))['S000006120'].flatten.include?('YPL199C')
+  end
+end

data/test/rbbt/sources/test_pubmed.rb CHANGED Viewed

@@ -21,7 +21,7 @@ class TestPubMed < Test::Unit::TestCase
     assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
   end
-  def test_full_text
+  def _test_full_text
     pmid = '16438716'
     assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
   end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-sources
 version: !ruby/object:Gem::Version
-  hash: 27
+  hash: 23
   prerelease: false
   segments:
   - 0
-  - 1
+  - 2
   - 0
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-12-01 00:00:00 +01:00
+date: 2010-12-10 00:00:00 +01:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -33,7 +33,7 @@ dependencies:
   type: :runtime
   version_requirements: *id001
 - !ruby/object:Gem::Dependency
-  name: mechanize
+  name: rbbt-text
   prerelease: false
   requirement: &id002 !ruby/object:Gem::Requirement
     none: false
@@ -47,7 +47,7 @@ dependencies:
   type: :runtime
   version_requirements: *id002
 - !ruby/object:Gem::Dependency
-  name: libxml-ruby
+  name: mechanize
   prerelease: false
   requirement: &id003 !ruby/object:Gem::Requirement
     none: false
@@ -60,6 +60,20 @@ dependencies:
         version: "0"
   type: :runtime
   version_requirements: *id003
+- !ruby/object:Gem::Dependency
+  name: libxml-ruby
+  prerelease: false
+  requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :runtime
+  version_requirements: *id004
 description: Data sources like PubMed, Entrez Gene, or Gene Ontology
 email: miguel.vazquez@fdi.ucm.es
 executables: []
@@ -76,9 +90,13 @@ files:
 - lib/rbbt/sources/gscholar.rb
 - lib/rbbt/sources/organism.rb
 - lib/rbbt/sources/pubmed.rb
+- share/install/Organism/Hsa/Rakefile
+- share/install/Organism/Sce/Rakefile
+- share/install/lib/helpers.rb
 - test/rbbt/sources/test_biomart.rb
 - test/rbbt/sources/test_entrez.rb
 - test/rbbt/sources/test_go.rb
+- test/rbbt/sources/test_organism.rb
 - test/rbbt/sources/test_pubmed.rb
 - test/test_helper.rb
 has_rdoc: true
@@ -119,5 +137,6 @@ test_files:
 - test/rbbt/sources/test_biomart.rb
 - test/rbbt/sources/test_entrez.rb
 - test/rbbt/sources/test_go.rb
+- test/rbbt/sources/test_organism.rb
 - test/rbbt/sources/test_pubmed.rb
 - test/test_helper.rb