rbbt-sources 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
1
+ require 'rbbt-util'
2
+ require 'rbbt/util/log'
3
3
 
4
4
  # This module interacts with BioMart. It performs queries to BioMart and
5
5
  # synthesises a hash with the results. Note that this module connects to the
@@ -9,6 +9,7 @@ require 'rbbt/util/open'
9
9
  module BioMart
10
10
 
11
11
  class BioMart::QueryError < StandardError; end
12
+
12
13
  private
13
14
 
14
15
  @@biomart_query_xml = <<-EOT
@@ -25,8 +26,7 @@ module BioMart
25
26
 
26
27
 
27
28
 
28
-
29
- def self.get(database, main, attrs = nil, filters = nil, data = nil, options = {})
29
+ def self.get(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
30
30
  attrs ||= []
31
31
  filters ||= ["with_#{main}"]
32
32
  data ||= {}
@@ -37,7 +37,7 @@ module BioMart
37
37
  query.sub!(/<!--MAIN-->/,"<Attribute name = \"#{main}\" />")
38
38
  query.sub!(/<!--ATTRIBUTES-->/, attrs.collect{|name| "<Attribute name = \"#{ name }\"/>"}.join("\n") )
39
39
 
40
- response = Open.read('http://www.biomart.org/biomart/martservice?query=' + query.gsub(/\n/,' '), options)
40
+ response = Open.read('http://www.biomart.org/biomart/martservice?query=' + query.gsub(/\n/,' '), open_options)
41
41
  if response =~ /Query ERROR:/
42
42
  raise BioMart::QueryError, response
43
43
  end
@@ -51,8 +51,12 @@ module BioMart
51
51
  attrs.each{|name|
52
52
  value = parts.shift
53
53
  data[main][name] ||= []
54
- next if value.nil?
55
- data[main][name] << value
54
+ next if value.nil? or value.empty?
55
+ if data[main][name]
56
+ data[main][name] = [value]
57
+ else
58
+ data[main][name] << value unless data[main][name].include? value
59
+ end
56
60
  }
57
61
  }
58
62
 
@@ -75,30 +79,48 @@ module BioMart
75
79
  # the BioMart query to remove results with the main attribute empty, this may
76
80
  # cause an error if the BioMart WS does not allow filtering with that
77
81
  # attribute.
78
- def self.query(database, main, attrs = nil, filters = nil, data = nil, options = {})
82
+ def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
83
+ open_options = Misc.add_defaults open_options, :nocache => false
79
84
  attrs ||= []
80
85
  data ||= {}
81
86
 
87
+ Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
88
+
89
+ max_items = 2
82
90
  chunks = []
83
91
  chunk = []
84
92
  attrs.each{|a|
85
93
  chunk << a
86
- if chunk.length == 2
94
+ if chunk.length == max_items
87
95
  chunks << chunk
88
96
  chunk = []
89
97
  end
90
98
  }
91
99
 
92
100
  chunks << chunk if chunk.any?
101
+
93
102
 
94
- chunks.each{|chunk|
95
- data = get(database, main, chunk, filters, data, options)
103
+ Log.low "Chunks: #{chunks.length}"
104
+ chunks.each_with_index{|chunk,i|
105
+ Log.low "Chunk #{ i }: [#{chunk * ", "}]"
106
+ data = get(database, main, chunk, filters, data, open_options)
96
107
  }
97
108
 
98
109
  data
99
110
  end
100
111
 
101
-
112
+ def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
113
+ codes = attrs.collect{|attr| attr[1]}
114
+ data = query(database, main.last, codes, filters, data, open_options)
115
+ tsv = TSV.new({})
116
+
117
+ data.each do |key, info|
118
+ tsv[key] = info.values_at(*codes)
119
+ end
102
120
 
121
+ tsv.key_field = main.first
122
+ tsv.fields = attrs.collect{|attr| attr.first}
123
+ tsv
124
+ end
103
125
  end
104
126
 
@@ -14,7 +14,10 @@ module Entrez
14
14
  taxs = [taxs] unless Array === taxs
15
15
  options.merge! :grep => taxs
16
16
 
17
- TSV.new(Rbbt.find_datafile('gene_info'), options)
17
+ tsv = TSV.new(Rbbt.find_datafile('gene_info'), options)
18
+ tsv.key_field = "Entrez Gene ID"
19
+ tsv.fields = ["Native ID"]
20
+ tsv
18
21
  end
19
22
 
20
23
  def self.entrez2pubmed(taxs)
@@ -4,66 +4,67 @@ require 'rbbt-util'
4
4
  # now all it does is provide a translation form id to the actual names.
5
5
  module GO
6
6
 
7
- @@info = nil
7
+ Rbbt.add_datafiles :gene_ontology => ['databases/GO', 'ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo'],
8
+ :goslim_generic => ['databases/GO', 'http://www.geneontology.org/GO_slims/goslim_generic.obo']
9
+
10
+
8
11
  MULTIPLE_VALUE_FIELDS = %w(is_a)
12
+ TSV_GENE_ONTOLOGY = File.join(TSV.cachedir, 'gene_ontology')
9
13
 
10
14
  # This method needs to be called before any translations can be made, it is
11
15
  # called automatically the first time the id2name method is called. It loads
12
16
  # the gene_ontology.obo file and extracts all the fields, although right now,
13
17
  # only the name field is used.
14
18
  def self.init
15
- @@info = {}
16
- File.open(File.join(Rbbt.datadir, 'dbs/go/gene_ontology.obo')).read.
17
- split(/\[Term\]/).
18
- each{|term|
19
+ info = TCHash.new(TSV_GENE_ONTOLOGY, true)
20
+ File.open(Rbbt.find_datafile('gene_ontology')).read.split(/\[Term\]/).each{|term|
19
21
  term_info = {}
20
- term.split(/\n/).
21
- select{|l| l =~ /:/}.
22
- each{|l|
23
- key, value = l.chomp.match(/(.*?):(.*)/).values_at(1,2)
24
- if MULTIPLE_VALUE_FIELDS.include? key.strip
25
- term_info[key.strip] ||= []
26
- term_info[key.strip] << value.strip
27
- else
28
- term_info[key.strip] = value.strip
29
- end
30
- }
31
- @@info[term_info["id"]] = term_info
32
- }
22
+
23
+ term.split(/\n/). select{|l| l =~ /:/}.each{|l|
24
+ key, value = l.chomp.match(/(.*?):(.*)/).values_at(1,2)
25
+ if MULTIPLE_VALUE_FIELDS.include? key.strip
26
+ term_info[key.strip] ||= []
27
+ term_info[key.strip] << value.strip
28
+ else
29
+ term_info[key.strip] = value.strip
30
+ end
31
+ }
32
+
33
+ next if term_info["id"].nil?
34
+ info[term_info["id"]] = term_info
35
+ }
36
+ info.close
33
37
  end
34
38
 
35
39
  def self.info
36
- self.init unless @@info
37
- @@info
40
+ self.init unless File.exists? TSV_GENE_ONTOLOGY
41
+ TCHash.get(TSV_GENE_ONTOLOGY)
38
42
  end
39
43
 
40
44
  def self.goterms
41
- self.init unless @@info
42
- @@info.keys
45
+ info.keys
43
46
  end
44
47
 
45
48
  def self.id2name(id)
46
- self.init unless @@info
47
49
  if id.kind_of? Array
48
- @@info.values_at(*id).collect{|i| i['name'] if i}
50
+ info.values_at(*id).collect{|i| i['name'] if i}
49
51
  else
50
- return nil if @@info[id].nil?
51
- @@info[id]['name']
52
+ return nil if info[id].nil?
53
+ info[id]['name']
52
54
  end
53
55
  end
54
56
 
55
57
  def self.id2ancestors(id)
56
- self.init unless @@info
57
58
  if id.kind_of? Array
58
- @@info.values_at(*id).
59
+ info.values_at(*id).
59
60
  select{|i| ! i['is_a'].nil?}.
60
61
  collect{|i| i['is_a'].collect{|id|
61
- id.match(/(GO:\d+)/)[1] if id.match(/(GO:\d+)/)
62
- }.compact
62
+ id.match(/(GO:\d+)/)[1] if id.match(/(GO:\d+)/)
63
+ }.compact
63
64
  }
64
65
  else
65
- return [] if @@info[id].nil? || @@info[id]['is_a'].nil?
66
- @@info[id]['is_a'].
66
+ return [] if id.nil? or info[id].nil? or info[id]['is_a'].nil?
67
+ info[id]['is_a'].
67
68
  collect{|id|
68
69
  id.match(/(GO:\d+)/)[1] if id.match(/(GO:\d+)/)
69
70
  }.compact
@@ -71,14 +72,12 @@ module GO
71
72
  end
72
73
 
73
74
  def self.id2namespace(id)
74
- self.init unless @@info
75
+ self.init unless info
75
76
  if id.kind_of? Array
76
- @@info.values_at(*id).collect{|i| i['namespace'] if i}
77
+ info.values_at(*id).collect{|i| i['namespace'] if i}
77
78
  else
78
- return nil if @@info[id].nil?
79
- @@info[id]['namespace']
79
+ return nil if info[id].nil?
80
+ info[id]['namespace']
80
81
  end
81
82
  end
82
-
83
-
84
83
  end
@@ -1,9 +1,15 @@
1
1
  require 'rbbt-util'
2
+ require 'rbbt/util/data_module'
3
+
2
4
  module Organism
3
5
  class OrganismNotProcessedError < StandardError; end
4
6
 
5
7
  def self.datadir(org)
6
8
  File.join(Rbbt.datadir, 'organisms', org)
7
9
  end
8
-
10
+
11
+ extend DataModule
12
+
13
+ Hsa = with_key('Hsa')
14
+ Sce = with_key('Sce')
9
15
  end
@@ -0,0 +1,83 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
2
+ require 'rbbt/sources/biomart'
3
+ require 'rbbt/sources/entrez'
4
+ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
+
6
+ $taxs = [559292,4932]
7
+ $native = "SGD ID"
8
+ $url = "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab"
9
+ $biomart_db = 'hsapiens_gene_ensembl'
10
+ $biomart_main = ['Entrez Gene ID', 'entrezgene']
11
+ $biomart_lexicon = [
12
+ [ 'Associated Gene Name' , "external_gene_id"],
13
+ [ 'HGNC symbol', "hgnc_symbol" ],
14
+ [ 'HGNC automatic gene name', "hgnc_automatic_gene_name" ],
15
+ [ 'HGNC curated gene name ', "hgnc_curated_gene_name" ],
16
+ ]
17
+
18
+ $biomart_identifiers = [
19
+ [ 'Ensembl Gene ID', "ensembl_gene_id" ],
20
+ [ 'Ensembl Protein ID', "ensembl_peptide_id" ],
21
+ [ 'Associated Gene Name', "external_gene_id" ],
22
+ [ 'CCDS ID', "ccds" ],
23
+ [ 'Protein ID', "protein_id" ],
24
+ [ 'RefSeq Protein ID', "refseq_peptide" ],
25
+ [ 'Unigene ID', "unigene" ],
26
+ [ 'UniProt/SwissProt ID', "uniprot_swissprot" ],
27
+ [ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
28
+ [ 'HGNC ID', "hgnc_id", 'HGNC'],
29
+ [ 'EMBL (Genbank) ID' , "embl"] ,
30
+
31
+ # Affymetrix
32
+ [ 'AFFY HC G110', 'affy_hc_g110' ],
33
+ [ 'AFFY HG FOCUS', 'affy_hg_focus' ],
34
+ [ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
35
+ [ 'AFFY HG U133A_2', 'affy_hg_u133a_2' ],
36
+ [ 'AFFY HG U133A', 'affy_hg_u133a' ],
37
+ [ 'AFFY HG U133B', 'affy_hg_u133b' ],
38
+ [ 'AFFY HG U95AV2', 'affy_hg_u95av2' ],
39
+ [ 'AFFY HG U95B', 'affy_hg_u95b' ],
40
+ [ 'AFFY HG U95C', 'affy_hg_u95c' ],
41
+ [ 'AFFY HG U95D', 'affy_hg_u95d' ],
42
+ [ 'AFFY HG U95E', 'affy_hg_u95e' ],
43
+ [ 'AFFY HG U95A', 'affy_hg_u95a' ],
44
+ [ 'AFFY HUGENEFL', 'affy_hugenefl' ],
45
+ [ 'AFFY HuEx', 'affy_huex_1_0_st_v2' ],
46
+ [ 'AFFY HuGene', 'affy_hugene_1_0_st_v1' ],
47
+ [ 'AFFY U133 X3P', 'affy_u133_x3p' ],
48
+ [ 'Agilent WholeGenome',"agilent_wholegenome" ],
49
+ [ 'Agilent CGH 44b', 'agilent_cgh_44b' ],
50
+ [ 'Codelink ID', 'codelink' ],
51
+ [ 'Illumina HumanWG 6 v2', 'illumina_humanwg_6_v2' ],
52
+ [ 'Illumina HumanWG 6 v3', 'illumina_humanwg_6_v3' ],
53
+ ]
54
+
55
+
56
+
57
+ file 'name' do |t|
58
+ File.open(t.name, 'w') do |f| f.puts "Homo sapiens" end
59
+ end
60
+
61
+ file 'lexicon' do |t|
62
+ lexicon = tsv_file('http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_prev_name&col=gd_aliases&col=gd_name_aliases&col=gd_pub_acc_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag',
63
+ "HGNC ID", nil, :flatten => true, :header_hash => '')
64
+ merge_biomart lexicon, $biomart_db, $biomart_main, $biomart_lexicon, "HGNC ID"
65
+
66
+ File.open(t.name, 'w') do |f| f.puts lexicon end
67
+ end
68
+
69
+ file 'identifiers' do |t|
70
+ identifiers = BioMart.tsv($biomart_db, $biomart_main, $biomart_identifiers)
71
+ $biomart_identifiers.each do |name, key, prefix|
72
+ if prefix
73
+ identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
74
+ end
75
+ end
76
+
77
+ File.open(t.name, 'w') do |f| f.puts identifiers end
78
+ end
79
+
80
+
81
+ task :default => ['name', 'lexicon', 'identifiers']
82
+
83
+
@@ -0,0 +1,118 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
2
+ require 'rbbt/sources/biomart'
3
+ require 'rbbt/sources/entrez'
4
+ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
+
6
+ $taxs = [559292,4932]
7
+ $native = "SGD ID"
8
+ $url = "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab"
9
+ $biomart_db = 'scerevisiae_gene_ensembl'
10
+ $biomart_main = ['Entrez Gene ID', 'entrezgene']
11
+
12
+
13
+ file 'name' do |t|
14
+ File.open(t.name, 'w') do |f| f.puts "Saccharomyces cerevisiae" end
15
+ end
16
+
17
+ file 'lexicon' do |t|
18
+ lexicon = tsv_file($url, [$native, 0], [3, 4, 5], :keep_empty => true)
19
+
20
+ merge_entrez(lexicon, $taxs, $native, proc{|code| code.sub(/SGD:S0/,'S0') }, proc{|code| code.match(/\tS0/)})
21
+
22
+ merge_biomart(lexicon, $biomart_db, $biomart_main, [['Interpro Description' , "interpro_description"]])
23
+
24
+ lexicon = lexicon.slice(lexicon.fields - ["Entrez Gene ID"])
25
+
26
+ File.open(t.name, 'w') do |f| f.puts lexicon end
27
+ end
28
+
29
+ file 'identifiers' do |t|
30
+ identifiers = tsv_file($url, [$native, 0], [3, 4, 5], :keep_empty => true)
31
+
32
+ merge_entrez(identifiers, $taxs, $native, proc{|code| code.sub(/SGD:S0/,'S0') }, proc{|code| code.match(/\tS0/)})
33
+
34
+ merge_biomart(identifiers, $biomart_db, $biomart_main,
35
+ [['Associated Gene Name' , "external_gene_id"],
36
+ ['Ensembl Gene ID', "ensembl_gene_id" ],
37
+ ['Ensembl Protein ID', "ensembl_peptide_id" ],
38
+ ['RefSeq Protein ID' , "refseq_peptide"] ,
39
+ ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
40
+ ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
41
+ ['Protein ID' , "protein_id"] ,
42
+ ['EMBL (Genbank) ID' , "embl"] ,
43
+ # Affymetrix
44
+ ['Affy yeast 2',"affy_yeast_2"],
45
+ ['Affy yg s98', "affy_yg_s98"]])
46
+
47
+ File.open(t.name, 'w') do |f| f.puts identifiers end
48
+ end
49
+
50
+
51
+ task :default => ['name', 'lexicon', 'identifiers']
52
+
53
+ #require __FILE__.sub(/[^\/]*$/,'') + '../rake-include'
54
+ #
55
+ #$name = "Saccharomyces cerevisiae"
56
+ #
57
+ #
58
+ #$native_id = "SGD DB Id"
59
+ #
60
+ #$entrez2native = {
61
+ # :tax => 559292,
62
+ # :fix => proc{|code| code.sub(/SGD:S0/,'S0') },
63
+ # :check => proc{|code| code.match(/^S0/)},
64
+ #}
65
+ #
66
+ #$lexicon = {
67
+ # :file => {
68
+ # :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab",
69
+ # :native => 0,
70
+ # :extra => [4,3,5]
71
+ # },
72
+ # :biomart => {
73
+ # :database => 'scerevisiae_gene_ensembl',
74
+ # :main => ['Entrez Gene ID', 'entrezgene'],
75
+ # :extra => [
76
+ # ['Interpro Description' , "interpro_description"],
77
+ # ],
78
+ # :filter => [],
79
+ # }
80
+ #
81
+ #}
82
+ #
83
+ #$identifiers = {
84
+ # :file => {
85
+ # :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/chromosomal_feature/SGD_features.tab",
86
+ # :native => 0,
87
+ # :extra => [],
88
+ # },
89
+ # :biomart => {
90
+ # :database => 'scerevisiae_gene_ensembl',
91
+ # :main => ['Entrez Gene ID', 'entrezgene'],
92
+ # :extra => [
93
+ # ['Associated Gene Name' , "external_gene_id"],
94
+ # ['Ensembl Gene ID', "ensembl_gene_id" ],
95
+ # ['Ensembl Protein ID', "ensembl_peptide_id" ],
96
+ # ['RefSeq Protein ID' , "refseq_peptide"] ,
97
+ # ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
98
+ # ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
99
+ # ['Protein ID' , "protein_id"] ,
100
+ # ['EMBL (Genbank) ID' , "embl"] ,
101
+ # # Affymetrix
102
+ # ['Affy yeast 2',"affy_yeast_2"],
103
+ # ['Affy yg s98', "affy_yg_s98"],
104
+ # ],
105
+ # :filter => [],
106
+ # }
107
+ #}
108
+ #
109
+ #$go = {
110
+ # :url => "ftp://genome-ftp.stanford.edu/pub/yeast/data_download/literature_curation/gene_association.sgd.gz",
111
+ # :code => 1,
112
+ # :go => 4,
113
+ # :pmid => 5,
114
+ #}
115
+ #
116
+ #$query = '"saccharomyces cerevisiae"[All Fields] AND ((("proteins"[TIAB] NOT Medline[SB]) OR "proteins"[MeSH Terms] OR protein[Text Word]) OR (("genes"[TIAB] NOT Medline[SB]) OR "genes"[MeSH Terms] OR gene[Text Word])) AND hasabstract[text] AND English[lang]'
117
+ #
118
+ #
@@ -0,0 +1,47 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/sources/biomart'
3
+ require 'rbbt/sources/entrez'
4
+
5
+ def tsv_file(url, native, extra, options = {})
6
+ options = Misc.add_defaults options, :persistence => false, :keep_empty => true
7
+
8
+ case
9
+ when Array === native
10
+ options = Misc.add_defaults options, :native => native.last
11
+ key_field = native.first
12
+ when (String === native or Integer === native)
13
+ options = Misc.add_defaults options, :native => native
14
+ key_field = nil
15
+ else
16
+ key_field = nil
17
+ end
18
+
19
+ case
20
+ when (Array === extra and Array === extra.first)
21
+ options = Misc.add_defaults options, :extra => extra.collect{|e| e.last}
22
+ fields = extra.collect{|e| e.first}
23
+ when (Array === extra and not Array === extra.first)
24
+ options = Misc.add_defaults options, :extra => extra
25
+ fields = (1..extra.length).to_a.collect{|i| "Field#{i}"}
26
+ else
27
+ fields = nil
28
+ end
29
+
30
+ tsv = TSV.new(Open.open(url), options)
31
+ tsv.key_field ||= key_field
32
+ tsv.fields ||= fields
33
+ tsv
34
+ end
35
+
36
+ def merge_entrez(data, taxs, native, fix = nil, select = nil)
37
+ entrez = Entrez.entrez2native(taxs, :fix => fix, :select => select)
38
+ entrez.fields = [native]
39
+ entrez
40
+
41
+ data.smart_merge entrez, native
42
+ end
43
+
44
+ def merge_biomart(lexicon, db, native, other, match = nil)
45
+ match ||= native.first
46
+ lexicon.smart_merge BioMart.tsv(db, native, other), match
47
+ end
@@ -9,22 +9,27 @@ class TestBioMart < Test::Unit::TestCase
9
9
  BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],['with_unknownattr'])
10
10
  end
11
11
 
12
- data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => true, :wget_options => { :quiet => false})
13
- assert(data['856452']['protein_id'].include? 'AAB68382')
14
-
15
- data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => true, :wget_options => { :quiet => false} )
16
- assert(data['856452']['protein_id'].include? 'AAB68382')
17
- assert(data['856452']['external_gene_id'].include? 'CUP1-2')
12
+ data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[], nil, :nocache => false, :wget_options => { :quiet => false})
13
+ assert(data['852236']['protein_id'].include? 'CAA84864')
18
14
 
15
+ data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data, :nocache => false, :wget_options => { :quiet => false} )
16
+ assert(data['852236']['protein_id'].include? 'CAA84864')
17
+ assert(data['852236']['external_gene_id'].include? 'YBL044W')
19
18
  end
20
19
 
21
20
  def test_query
22
- data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => true, :wget_options => { :quiet => false})
21
+ data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false})
22
+
23
+ assert(data['852236']['external_gene_id'].include? 'YBL044W')
24
+ end
23
25
 
24
- assert(data['856452']['protein_id'].include? 'AAB68382')
25
- assert(data['856452']['external_gene_id'].include? 'CUP1-2')
26
- end
26
+ def test_tsv
27
+ data = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false})
27
28
 
29
+ assert(data['852236']['Protein ID'].include? 'CAA84864')
30
+ assert_equal 'Entrez Gene', data.key_field
31
+ assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
32
+ end
28
33
  end
29
34
 
30
35
 
@@ -3,12 +3,12 @@ require 'rbbt/sources/entrez'
3
3
  require 'test/unit'
4
4
 
5
5
  class TestEntrez < Test::Unit::TestCase
6
- $yeast_tax = 559292
6
+ $yeast_tax = [559292,4932]
7
7
 
8
8
  def test_entrez2native
9
9
  tax = $yeast_tax
10
10
  fix = proc{|line| line.sub(/SGD:S0/,'S0') }
11
- select = proc{|line| line.match(/\tSGD:S0/)}
11
+ select = proc{|line| line.match(/\tS0/)}
12
12
  lexicon = Entrez.entrez2native(tax, :fix => fix, :select => select)
13
13
 
14
14
  assert(lexicon['855611'].include? 'S000005056')
@@ -4,7 +4,6 @@ require 'rbbt/sources/go'
4
4
  require 'test/unit'
5
5
 
6
6
  class TestGo < Test::Unit::TestCase
7
-
8
7
  def test_go
9
8
  assert_match('vacuole inheritance',GO::id2name('GO:0000011'))
10
9
  assert_equal(['vacuole inheritance','alpha-glucoside transport'], GO::id2name(['GO:0000011','GO:0000017']))
@@ -17,8 +16,6 @@ class TestGo < Test::Unit::TestCase
17
16
  def test_namespace
18
17
  assert_equal 'biological_process', GO.id2namespace('GO:0000001')
19
18
  end
20
-
21
-
22
19
  end
23
20
 
24
21
 
@@ -0,0 +1,17 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/sources/organism'
3
+ require 'test/unit'
4
+
5
+ class TestEntrez < Test::Unit::TestCase
6
+ def test_identifiers
7
+ assert TSV.new(Organism.identifiers('Sce'))['S000006120']["Ensembl Gene ID"].include?('YPL199C')
8
+ assert TSV.new(Organism::Sce.identifiers)['S000006120']["Ensembl Gene ID"].include?('YPL199C')
9
+ #assert Organism.identifiers('Hsa')['1020']["Associated Gene Name"].include?('CDK5')
10
+ end
11
+
12
+ def test_lexicon
13
+ assert TSV.new(Organism.lexicon('Sce'))['S000006120'].flatten.include?('YPL199C')
14
+ end
15
+ end
16
+
17
+
@@ -21,7 +21,7 @@ class TestPubMed < Test::Unit::TestCase
21
21
  assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
22
22
  end
23
23
 
24
- def test_full_text
24
+ def _test_full_text
25
25
  pmid = '16438716'
26
26
  assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
27
27
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
8
+ - 2
9
9
  - 0
10
- version: 0.1.0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-12-01 00:00:00 +01:00
18
+ date: 2010-12-10 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -33,7 +33,7 @@ dependencies:
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
35
  - !ruby/object:Gem::Dependency
36
- name: mechanize
36
+ name: rbbt-text
37
37
  prerelease: false
38
38
  requirement: &id002 !ruby/object:Gem::Requirement
39
39
  none: false
@@ -47,7 +47,7 @@ dependencies:
47
47
  type: :runtime
48
48
  version_requirements: *id002
49
49
  - !ruby/object:Gem::Dependency
50
- name: libxml-ruby
50
+ name: mechanize
51
51
  prerelease: false
52
52
  requirement: &id003 !ruby/object:Gem::Requirement
53
53
  none: false
@@ -60,6 +60,20 @@ dependencies:
60
60
  version: "0"
61
61
  type: :runtime
62
62
  version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: libxml-ruby
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ type: :runtime
76
+ version_requirements: *id004
63
77
  description: Data sources like PubMed, Entrez Gene, or Gene Ontology
64
78
  email: miguel.vazquez@fdi.ucm.es
65
79
  executables: []
@@ -76,9 +90,13 @@ files:
76
90
  - lib/rbbt/sources/gscholar.rb
77
91
  - lib/rbbt/sources/organism.rb
78
92
  - lib/rbbt/sources/pubmed.rb
93
+ - share/install/Organism/Hsa/Rakefile
94
+ - share/install/Organism/Sce/Rakefile
95
+ - share/install/lib/helpers.rb
79
96
  - test/rbbt/sources/test_biomart.rb
80
97
  - test/rbbt/sources/test_entrez.rb
81
98
  - test/rbbt/sources/test_go.rb
99
+ - test/rbbt/sources/test_organism.rb
82
100
  - test/rbbt/sources/test_pubmed.rb
83
101
  - test/test_helper.rb
84
102
  has_rdoc: true
@@ -119,5 +137,6 @@ test_files:
119
137
  - test/rbbt/sources/test_biomart.rb
120
138
  - test/rbbt/sources/test_entrez.rb
121
139
  - test/rbbt/sources/test_go.rb
140
+ - test/rbbt/sources/test_organism.rb
122
141
  - test/rbbt/sources/test_pubmed.rb
123
142
  - test/test_helper.rb