rbbt-sources 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rbbt'
|
1
2
|
require 'rbbt/util/tsv'
|
2
3
|
require 'rbbt/util/log'
|
3
4
|
require 'cgi'
|
@@ -13,6 +14,8 @@ module BioMart
|
|
13
14
|
|
14
15
|
BIOMART_URL = 'http://biomart.org/biomart/martservice?query='
|
15
16
|
|
17
|
+
MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.yaml
|
18
|
+
|
16
19
|
private
|
17
20
|
|
18
21
|
@@biomart_query_xml = <<-EOT
|
@@ -28,12 +31,14 @@ module BioMart
|
|
28
31
|
EOT
|
29
32
|
|
30
33
|
def self.set_archive(date)
|
34
|
+
@archive = date
|
31
35
|
@archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
|
32
36
|
Log.debug "Using Archive URL #{ @archive_url }"
|
33
37
|
end
|
34
38
|
|
35
39
|
def self.unset_archive
|
36
40
|
Log.debug "Restoring current version URL #{BIOMART_URL}"
|
41
|
+
@archive = nil
|
37
42
|
@archive_url = nil
|
38
43
|
end
|
39
44
|
|
@@ -61,15 +66,16 @@ module BioMart
|
|
61
66
|
result_file = TmpFile.tmp_file
|
62
67
|
Open.write(result_file, response)
|
63
68
|
|
69
|
+
new_datafile = TmpFile.tmp_file
|
64
70
|
if data.nil?
|
65
|
-
|
71
|
+
TSV.merge_rows Open.open(result_file), new_datafile
|
72
|
+
data = new_datafile
|
66
73
|
else
|
67
|
-
new_datafile = TmpFile.tmp_file
|
68
74
|
TSV.paste_merge data, result_file, new_datafile
|
69
75
|
FileUtils.rm data
|
70
76
|
data = new_datafile
|
71
|
-
FileUtils.rm result_file
|
72
77
|
end
|
78
|
+
FileUtils.rm result_file
|
73
79
|
|
74
80
|
data
|
75
81
|
end
|
@@ -91,7 +97,8 @@ module BioMart
|
|
91
97
|
# cause an error if the BioMart WS does not allow filtering with that
|
92
98
|
# attribute.
|
93
99
|
def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
94
|
-
open_options = Misc.add_defaults open_options, :nocache => false
|
100
|
+
open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil
|
101
|
+
filename, field_names = Misc.process_options open_options, :filename, :field_names
|
95
102
|
attrs ||= []
|
96
103
|
|
97
104
|
open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
|
@@ -118,22 +125,41 @@ module BioMart
|
|
118
125
|
data = get(database, main, chunk, filters, data, open_options)
|
119
126
|
}
|
120
127
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
+
open_options[:filename] ||= "BioMart: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}"
|
129
|
+
if filename.nil?
|
130
|
+
results = TSV.new data, open_options
|
131
|
+
results.key_field = main
|
132
|
+
results.fields = attrs
|
133
|
+
results
|
134
|
+
else
|
135
|
+
Open.write(filename) do |f|
|
136
|
+
f.puts "#: " << Misc.hash2string(TSV::EXTRA_ACCESSORS.collect{|key| [key, open_options[key]]})
|
137
|
+
if field_names.nil?
|
138
|
+
f.puts "#" << [main, attrs].flatten * "\t"
|
139
|
+
else
|
140
|
+
f.puts "#" << field_names * "\t"
|
141
|
+
end
|
142
|
+
f.write Open.read(data)
|
143
|
+
end
|
144
|
+
FileUtils.rm data
|
145
|
+
filename
|
146
|
+
end
|
128
147
|
end
|
129
148
|
|
130
149
|
def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
131
|
-
|
132
|
-
|
150
|
+
if @archive_url
|
151
|
+
attrs = attrs.reject{|attr| MISSING_IN_ARCHIVE[@archive].include? attr[1]}
|
152
|
+
end
|
133
153
|
|
134
|
-
|
135
|
-
|
136
|
-
|
154
|
+
codes = attrs.collect{|attr| attr[1]}
|
155
|
+
if open_options[:filename].nil?
|
156
|
+
tsv = query(database, main.last, codes, filters, data, open_options)
|
157
|
+
tsv.key_field = main.first
|
158
|
+
tsv.fields = attrs.collect{|attr| attr.first}
|
159
|
+
tsv
|
160
|
+
else
|
161
|
+
query(database, main.last, codes, filters, data, open_options.merge(:field_names => [main.first, attrs.collect{|attr| attr.first}].flatten))
|
162
|
+
end
|
137
163
|
end
|
138
164
|
end
|
139
165
|
|
@@ -29,6 +29,7 @@ module Organism
|
|
29
29
|
options = Misc.add_defaults options, :persistence => true, :case_insensitive => true, :double => false
|
30
30
|
double = Misc.process_options options, :double
|
31
31
|
|
32
|
+
|
32
33
|
options.merge! :target => target unless target.nil?
|
33
34
|
options.merge! :fields => fields unless fields.nil?
|
34
35
|
|
@@ -69,7 +70,7 @@ module Organism
|
|
69
70
|
}.first
|
70
71
|
end
|
71
72
|
|
72
|
-
["Hsa", "Sce"].each do |organism|
|
73
|
+
["Hsa", "Rno", "Sce"].each do |organism|
|
73
74
|
rakefile = Rbbt["share/install/Organism/#{ organism }/Rakefile"]
|
74
75
|
rakefile.lib_dir = Resource.caller_lib_dir __FILE__
|
75
76
|
rakefile.pkgdir = 'phgx'
|
@@ -4,6 +4,8 @@ require 'bio'
|
|
4
4
|
# Sequence analyses
|
5
5
|
module Organism
|
6
6
|
extend WorkFlow
|
7
|
+
relative_to Rbbt, "share/organisms"
|
8
|
+
self.jobdir = Rbbt.var.organism.find
|
7
9
|
|
8
10
|
def self.coding_transcripts_for_exon(org, exon, exon_transcripts, transcript_info)
|
9
11
|
exon_transcripts ||= Organism.transcript_exons(org).tsv(:double, :key => "Ensembl Exon ID", :fields => ["Ensembl Transcript ID"], :merge => true, :persistence => true )
|
@@ -201,16 +203,59 @@ module Organism
|
|
201
203
|
position_offsets
|
202
204
|
end
|
203
205
|
|
204
|
-
task_option :
|
206
|
+
task_option :organism, "Organism", :string, "Hsa"
|
205
207
|
task_option :genomic_mutations, "Position (chr:position), Allele", :tsv
|
206
|
-
|
208
|
+
task_dependencies nil
|
209
|
+
task :genomic_mutations_to_genes => :tsv do |org,genomic_mutations|
|
210
|
+
genomic_mutations = case
|
211
|
+
when TSV === genomic_mutations
|
212
|
+
genomic_mutations
|
213
|
+
else
|
214
|
+
TSV.new StringIO.new(genomic_mutations), :list
|
215
|
+
end
|
216
|
+
genomic_mutations.key_field = "Position"
|
217
|
+
genomic_mutations.fields = ["Mutation"]
|
218
|
+
|
219
|
+
positions = genomic_mutations.keys.collect{|l| l.split(":")}
|
220
|
+
|
221
|
+
step(:resources, "Load Resources")
|
222
|
+
genes_at_positions = Hash[*genomic_mutations.keys.zip(Organism.genes_at_genomic_positions(org, positions)).flatten]
|
223
|
+
|
224
|
+
genomic_mutations.add_field "#{org.sub(/\/.*/,'')}:Ensembl Gene ID" do |position, values|
|
225
|
+
genes_at_positions[position]
|
226
|
+
end
|
227
|
+
|
228
|
+
genomic_mutations
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
task_description <<-EOF
|
233
|
+
Translates a collection of mutations in genomic coordinates into mutations in aminoacids for the
|
234
|
+
protein products of transcripts including those positions.
|
235
|
+
EOF
|
236
|
+
task_option :organism, "Organism", :string, "Hsa"
|
237
|
+
task_option :genomic_mutations, "Position (chr:position), Allele", :tsv
|
238
|
+
task_dependencies nil
|
239
|
+
task :genomic_mutations_to_protein_mutations => :tsv do |org,genomic_mutations|
|
240
|
+
genomic_mutations = case
|
241
|
+
when TSV === genomic_mutations
|
242
|
+
genomic_mutations
|
243
|
+
else
|
244
|
+
TSV.new StringIO.new(genomic_mutations), :list
|
245
|
+
end
|
246
|
+
|
247
|
+
genomic_mutations.key_field = "Position"
|
248
|
+
genomic_mutations.fields = ["Mutation"]
|
249
|
+
|
207
250
|
positions = genomic_mutations.keys.collect{|l| l.split(":")}
|
208
251
|
|
209
252
|
step(:prepare, "Prepare Results")
|
210
253
|
results = TSV.new({})
|
211
254
|
results.key_field = "Position"
|
212
|
-
results.fields = ["Ensembl Transcript ID", "Mutation"]
|
255
|
+
results.fields = ["#{org.sub(/\/.*/,'')}:Ensembl Transcript ID", "Protein Mutation"]
|
213
256
|
results.type = :double
|
257
|
+
results.filename = path
|
258
|
+
|
214
259
|
|
215
260
|
step(:resources, "Load Resources")
|
216
261
|
transcript_sequence = Organism.transcript_sequence(org).tsv(:single, :persistence => true)
|
@@ -229,7 +274,6 @@ module Organism
|
|
229
274
|
|
230
275
|
transcripts.each do |transcript, offset_info|
|
231
276
|
offset, strand = offset_info
|
232
|
-
ddd strand
|
233
277
|
begin
|
234
278
|
codon = Organism.codon_at_transcript_position(org, transcript, offset, transcript_sequence, transcript_5utr)
|
235
279
|
rescue
|
@@ -237,12 +281,9 @@ module Organism
|
|
237
281
|
next
|
238
282
|
end
|
239
283
|
|
240
|
-
ddd codon
|
241
284
|
if not codon.nil?
|
242
285
|
alleles.each do |allele|
|
243
|
-
ddd allele
|
244
286
|
allele = Misc::BASE2COMPLEMENT[allele] if strand == -1
|
245
|
-
ddd allele
|
246
287
|
change = Organism.codon_change(allele, *codon.values_at(0,1))
|
247
288
|
pos_code = position * ":"
|
248
289
|
mutation = [change.first, codon.last + 1, change.last] * ""
|
@@ -323,7 +364,7 @@ X 10085674 C T
|
|
323
364
|
#positions = positions.select ["10:98099540"]
|
324
365
|
|
325
366
|
Organism.basedir = Rbbt.tmp.organism.sequence.jobs.find :user
|
326
|
-
job = Organism.job :
|
367
|
+
job = Organism.job :genomic_mutations_to_protein_mutations, "Metastasis", org, positions.slice("Tumor")
|
327
368
|
job.run
|
328
369
|
|
329
370
|
while not job.done?
|
@@ -0,0 +1,44 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
|
2
|
+
require 'rbbt/sources/biomart'
|
3
|
+
require 'rbbt/sources/entrez'
|
4
|
+
require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
5
|
+
|
6
|
+
$taxs = [10116]
|
7
|
+
$scientific_name = "Rattus norvegicus"
|
8
|
+
|
9
|
+
$biomart_db = 'rnorvegicus_gene_ensembl'
|
10
|
+
|
11
|
+
$biomart_lexicon = [
|
12
|
+
[ 'Associated Gene Name' , "external_gene_id"],
|
13
|
+
[ 'HGNC symbol', "hgnc_symbol" ],
|
14
|
+
[ 'HGNC automatic gene name', "hgnc_automatic_gene_name" ],
|
15
|
+
[ 'HGNC curated gene name ', "hgnc_curated_gene_name" ],
|
16
|
+
]
|
17
|
+
|
18
|
+
$biomart_identifiers = [
|
19
|
+
['Associated Gene Name' , "external_gene_id"],
|
20
|
+
['Protein ID' , "protein_id"] ,
|
21
|
+
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
22
|
+
['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
|
23
|
+
['RefSeq Protein ID' , "refseq_peptide"] ,
|
24
|
+
['RefSeq DNA ID' , "refseq_dna"] ,
|
25
|
+
['EMBL (Genbank) ID' , "embl"] ,
|
26
|
+
['RGD ID' , "rgd"] ,
|
27
|
+
['RGD Symbol' , "rgd_symbol"] ,
|
28
|
+
|
29
|
+
['Affy rae230a', "affy_rae230a"],
|
30
|
+
['Affy rae230b', "affy_rae230b"],
|
31
|
+
['Affy RaGene', "affy_ragene_1_0_st_v1"],
|
32
|
+
['Affy rat230 2', "affy_rat230_2"],
|
33
|
+
['Affy RaEx', "affy_raex_1_0_st_v1"],
|
34
|
+
['Affy rg u34a', "affy_rg_u34a"],
|
35
|
+
['Affy rg u34b', "affy_rg_u34b"],
|
36
|
+
['Affy rg u34c', "affy_rg_u34c"],
|
37
|
+
['Affy rn u34', "affy_rn_u34"],
|
38
|
+
['Affy rt u34', "affy_rt_u34"],
|
39
|
+
['Agilent WholeGenome',"agilent_wholegenome" ],
|
40
|
+
['Codelink ID ', "codelink"],
|
41
|
+
]
|
42
|
+
|
43
|
+
$namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
|
44
|
+
load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
|
@@ -103,7 +103,7 @@ file 'scientific_name' do |t|
|
|
103
103
|
end
|
104
104
|
|
105
105
|
file 'identifiers' do |t|
|
106
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [])
|
106
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => $namespace)
|
107
107
|
$biomart_identifiers.each do |name, key, prefix|
|
108
108
|
if prefix
|
109
109
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
@@ -114,20 +114,20 @@ file 'identifiers' do |t|
|
|
114
114
|
end
|
115
115
|
|
116
116
|
file 'gene_transcripts' do |t|
|
117
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat)
|
117
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
|
118
118
|
|
119
119
|
File.open(t.name, 'w') do |f| f.puts transcripts end
|
120
120
|
end
|
121
121
|
|
122
122
|
file 'transcripts' => 'gene_positions' do |t|
|
123
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list)
|
123
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => $namespace)
|
124
124
|
transcripts.attach TSV.new('gene_positions'), "Chromosome Name"
|
125
125
|
|
126
126
|
File.open(t.name, 'w') do |f| f.puts transcripts end
|
127
127
|
end
|
128
128
|
|
129
129
|
file 'transcript_3utr' do |t|
|
130
|
-
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :
|
130
|
+
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :namespace => $namespace)
|
131
131
|
|
132
132
|
File.open(t.name, 'w') do |f|
|
133
133
|
f.puts "#: :type=:single#cast=to_i"
|
@@ -142,7 +142,7 @@ end
|
|
142
142
|
|
143
143
|
|
144
144
|
file 'transcript_5utr' do |t|
|
145
|
-
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :
|
145
|
+
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :namespace => $namespace)
|
146
146
|
|
147
147
|
File.open(t.name, 'w') do |f|
|
148
148
|
f.puts "#: :type=:single#cast=to_i"
|
@@ -162,7 +162,7 @@ file 'gene_positions' do |t|
|
|
162
162
|
end
|
163
163
|
|
164
164
|
file 'gene_sequence' do |t|
|
165
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :
|
165
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
166
166
|
|
167
167
|
File.open(t.name, 'w') do |f|
|
168
168
|
f.puts "#: :type=:single"
|
@@ -179,7 +179,7 @@ file 'gene_sequence' do |t|
|
|
179
179
|
end
|
180
180
|
|
181
181
|
file 'protein_sequence' do |t|
|
182
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :
|
182
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
183
183
|
|
184
184
|
File.open(t.name, 'w') do |f|
|
185
185
|
f.puts "#: :type=:single"
|
@@ -197,20 +197,20 @@ file 'protein_sequence' do |t|
|
|
197
197
|
end
|
198
198
|
|
199
199
|
file 'exons' => 'gene_positions' do |t|
|
200
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list)
|
200
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => $namespace)
|
201
201
|
exons.attach TSV.new('gene_positions'), "Chromosome Name"
|
202
202
|
|
203
203
|
File.open(t.name, 'w') do |f| f.puts exons end
|
204
204
|
end
|
205
205
|
|
206
206
|
file 'transcript_exons' do |t|
|
207
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true)
|
207
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true, :namespace => $namespace)
|
208
208
|
|
209
209
|
File.open(t.name, 'w') do |f| f.puts exons end
|
210
210
|
end
|
211
211
|
|
212
212
|
file 'transcript_sequence' do |t|
|
213
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :
|
213
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
214
214
|
|
215
215
|
File.open(t.name, 'w') do |f|
|
216
216
|
f.puts "#: :type=:single"
|
@@ -232,28 +232,28 @@ $biomart_variation_filter = ["snptype_filters", "COMPLEX_INDEL,COMPLEX_INDEL&NMD
|
|
232
232
|
$biomart_variation_filter = ["snptype_filters", 'COMPLEX_INDEL&NMD_TRANSCRIPT']
|
233
233
|
|
234
234
|
file 'germline_variations' do |t|
|
235
|
-
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variations, [], nil, :keep_empty => true, :type => :list, :
|
236
|
-
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
235
|
+
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variations, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
237
236
|
end
|
238
237
|
|
239
238
|
file 'germline_variation_positions' do |t|
|
240
|
-
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variation_positions, [], nil, :keep_empty => true, :type => :list, :
|
239
|
+
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variation_positions, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
241
240
|
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
242
241
|
end
|
243
242
|
|
244
243
|
file 'somatic_variations' do |t|
|
245
|
-
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variations, [], nil, :keep_empty => true, :type => :list, :
|
244
|
+
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variations, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
246
245
|
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
247
246
|
end
|
248
247
|
|
249
248
|
file 'somatic_variation_positions' do |t|
|
250
|
-
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variation_positions, [], nil, :keep_empty => true, :type => :list, :
|
249
|
+
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variation_positions, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
251
250
|
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
252
251
|
end
|
253
252
|
|
254
253
|
file 'gene_pmids' do |t|
|
255
254
|
tsv = Entrez.entrez2pubmed($taxs)
|
256
|
-
text = "
|
255
|
+
text = "#: :namespace=#{$namespace}"
|
256
|
+
text += "#Entrez Gene ID\tPMID"
|
257
257
|
tsv.each do |gene, pmids|
|
258
258
|
text << "\n" << gene << "\t" << pmids * "|"
|
259
259
|
end
|
@@ -270,7 +270,8 @@ file 'exon_offsets' => %w(exons transcript_exons gene_transcripts transcripts tr
|
|
270
270
|
transcript_exons = TSV.new('transcript_exons', :double, :fields => ["Ensembl Exon ID","Exon Rank in Transcript"], :persistence => true )
|
271
271
|
|
272
272
|
|
273
|
-
string = "
|
273
|
+
string = "#: :namespace=#{$namespace}"
|
274
|
+
string += "#Ensembl Exon ID\tEnsembl Transcript ID\tOffset\n"
|
274
275
|
exons.each do |exon, info|
|
275
276
|
gene, start, finish, strand, chr = info
|
276
277
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../../test_helper'
|
2
2
|
require 'rbbt/sources/biomart'
|
3
|
+
require 'rbbt/util/tmpfile'
|
3
4
|
require 'test/unit'
|
4
5
|
|
5
6
|
class TestBioMart < Test::Unit::TestCase
|
@@ -20,16 +21,28 @@ class TestBioMart < Test::Unit::TestCase
|
|
20
21
|
|
21
22
|
def test_query
|
22
23
|
data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false})
|
23
|
-
|
24
24
|
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
25
|
+
|
26
|
+
TmpFile.with_file do |f|
|
27
|
+
filename = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
28
|
+
data = TSV.new Open.open(filename)
|
29
|
+
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
30
|
+
end
|
25
31
|
end
|
26
32
|
|
27
33
|
def test_tsv
|
28
34
|
data = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false})
|
29
|
-
|
30
35
|
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
31
36
|
assert_equal 'Entrez Gene', data.key_field
|
32
37
|
assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
|
38
|
+
|
39
|
+
TmpFile.with_file do |f|
|
40
|
+
filename = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
41
|
+
data = TSV.new Open.open(filename, :merge => true)
|
42
|
+
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
43
|
+
assert_equal 'Entrez Gene', data.key_field
|
44
|
+
assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
|
45
|
+
end
|
33
46
|
end
|
34
47
|
end
|
35
48
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 0.4.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-03-
|
18
|
+
date: 2011-03-23 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/rbbt/sources/polysearch.rb
|
96
96
|
- lib/rbbt/sources/pubmed.rb
|
97
97
|
- share/install/Organism/Hsa/Rakefile
|
98
|
+
- share/install/Organism/Rno/Rakefile
|
98
99
|
- share/install/Organism/Sce/Rakefile
|
99
100
|
- share/install/Organism/organism_helpers.rb
|
100
101
|
- share/install/lib/helpers.rb
|