rbbt-sources 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rbbt'
|
1
2
|
require 'rbbt/util/tsv'
|
2
3
|
require 'rbbt/util/log'
|
3
4
|
require 'cgi'
|
@@ -13,6 +14,8 @@ module BioMart
|
|
13
14
|
|
14
15
|
BIOMART_URL = 'http://biomart.org/biomart/martservice?query='
|
15
16
|
|
17
|
+
MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.yaml
|
18
|
+
|
16
19
|
private
|
17
20
|
|
18
21
|
@@biomart_query_xml = <<-EOT
|
@@ -28,12 +31,14 @@ module BioMart
|
|
28
31
|
EOT
|
29
32
|
|
30
33
|
def self.set_archive(date)
|
34
|
+
@archive = date
|
31
35
|
@archive_url = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
|
32
36
|
Log.debug "Using Archive URL #{ @archive_url }"
|
33
37
|
end
|
34
38
|
|
35
39
|
def self.unset_archive
|
36
40
|
Log.debug "Restoring current version URL #{BIOMART_URL}"
|
41
|
+
@archive = nil
|
37
42
|
@archive_url = nil
|
38
43
|
end
|
39
44
|
|
@@ -61,15 +66,16 @@ module BioMart
|
|
61
66
|
result_file = TmpFile.tmp_file
|
62
67
|
Open.write(result_file, response)
|
63
68
|
|
69
|
+
new_datafile = TmpFile.tmp_file
|
64
70
|
if data.nil?
|
65
|
-
|
71
|
+
TSV.merge_rows Open.open(result_file), new_datafile
|
72
|
+
data = new_datafile
|
66
73
|
else
|
67
|
-
new_datafile = TmpFile.tmp_file
|
68
74
|
TSV.paste_merge data, result_file, new_datafile
|
69
75
|
FileUtils.rm data
|
70
76
|
data = new_datafile
|
71
|
-
FileUtils.rm result_file
|
72
77
|
end
|
78
|
+
FileUtils.rm result_file
|
73
79
|
|
74
80
|
data
|
75
81
|
end
|
@@ -91,7 +97,8 @@ module BioMart
|
|
91
97
|
# cause an error if the BioMart WS does not allow filtering with that
|
92
98
|
# attribute.
|
93
99
|
def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
94
|
-
open_options = Misc.add_defaults open_options, :nocache => false
|
100
|
+
open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil
|
101
|
+
filename, field_names = Misc.process_options open_options, :filename, :field_names
|
95
102
|
attrs ||= []
|
96
103
|
|
97
104
|
open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
|
@@ -118,22 +125,41 @@ module BioMart
|
|
118
125
|
data = get(database, main, chunk, filters, data, open_options)
|
119
126
|
}
|
120
127
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
+
open_options[:filename] ||= "BioMart: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}"
|
129
|
+
if filename.nil?
|
130
|
+
results = TSV.new data, open_options
|
131
|
+
results.key_field = main
|
132
|
+
results.fields = attrs
|
133
|
+
results
|
134
|
+
else
|
135
|
+
Open.write(filename) do |f|
|
136
|
+
f.puts "#: " << Misc.hash2string(TSV::EXTRA_ACCESSORS.collect{|key| [key, open_options[key]]})
|
137
|
+
if field_names.nil?
|
138
|
+
f.puts "#" << [main, attrs].flatten * "\t"
|
139
|
+
else
|
140
|
+
f.puts "#" << field_names * "\t"
|
141
|
+
end
|
142
|
+
f.write Open.read(data)
|
143
|
+
end
|
144
|
+
FileUtils.rm data
|
145
|
+
filename
|
146
|
+
end
|
128
147
|
end
|
129
148
|
|
130
149
|
def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
131
|
-
|
132
|
-
|
150
|
+
if @archive_url
|
151
|
+
attrs = attrs.reject{|attr| MISSING_IN_ARCHIVE[@archive].include? attr[1]}
|
152
|
+
end
|
133
153
|
|
134
|
-
|
135
|
-
|
136
|
-
|
154
|
+
codes = attrs.collect{|attr| attr[1]}
|
155
|
+
if open_options[:filename].nil?
|
156
|
+
tsv = query(database, main.last, codes, filters, data, open_options)
|
157
|
+
tsv.key_field = main.first
|
158
|
+
tsv.fields = attrs.collect{|attr| attr.first}
|
159
|
+
tsv
|
160
|
+
else
|
161
|
+
query(database, main.last, codes, filters, data, open_options.merge(:field_names => [main.first, attrs.collect{|attr| attr.first}].flatten))
|
162
|
+
end
|
137
163
|
end
|
138
164
|
end
|
139
165
|
|
@@ -29,6 +29,7 @@ module Organism
|
|
29
29
|
options = Misc.add_defaults options, :persistence => true, :case_insensitive => true, :double => false
|
30
30
|
double = Misc.process_options options, :double
|
31
31
|
|
32
|
+
|
32
33
|
options.merge! :target => target unless target.nil?
|
33
34
|
options.merge! :fields => fields unless fields.nil?
|
34
35
|
|
@@ -69,7 +70,7 @@ module Organism
|
|
69
70
|
}.first
|
70
71
|
end
|
71
72
|
|
72
|
-
["Hsa", "Sce"].each do |organism|
|
73
|
+
["Hsa", "Rno", "Sce"].each do |organism|
|
73
74
|
rakefile = Rbbt["share/install/Organism/#{ organism }/Rakefile"]
|
74
75
|
rakefile.lib_dir = Resource.caller_lib_dir __FILE__
|
75
76
|
rakefile.pkgdir = 'phgx'
|
@@ -4,6 +4,8 @@ require 'bio'
|
|
4
4
|
# Sequence analyses
|
5
5
|
module Organism
|
6
6
|
extend WorkFlow
|
7
|
+
relative_to Rbbt, "share/organisms"
|
8
|
+
self.jobdir = Rbbt.var.organism.find
|
7
9
|
|
8
10
|
def self.coding_transcripts_for_exon(org, exon, exon_transcripts, transcript_info)
|
9
11
|
exon_transcripts ||= Organism.transcript_exons(org).tsv(:double, :key => "Ensembl Exon ID", :fields => ["Ensembl Transcript ID"], :merge => true, :persistence => true )
|
@@ -201,16 +203,59 @@ module Organism
|
|
201
203
|
position_offsets
|
202
204
|
end
|
203
205
|
|
204
|
-
task_option :
|
206
|
+
task_option :organism, "Organism", :string, "Hsa"
|
205
207
|
task_option :genomic_mutations, "Position (chr:position), Allele", :tsv
|
206
|
-
|
208
|
+
task_dependencies nil
|
209
|
+
task :genomic_mutations_to_genes => :tsv do |org,genomic_mutations|
|
210
|
+
genomic_mutations = case
|
211
|
+
when TSV === genomic_mutations
|
212
|
+
genomic_mutations
|
213
|
+
else
|
214
|
+
TSV.new StringIO.new(genomic_mutations), :list
|
215
|
+
end
|
216
|
+
genomic_mutations.key_field = "Position"
|
217
|
+
genomic_mutations.fields = ["Mutation"]
|
218
|
+
|
219
|
+
positions = genomic_mutations.keys.collect{|l| l.split(":")}
|
220
|
+
|
221
|
+
step(:resources, "Load Resources")
|
222
|
+
genes_at_positions = Hash[*genomic_mutations.keys.zip(Organism.genes_at_genomic_positions(org, positions)).flatten]
|
223
|
+
|
224
|
+
genomic_mutations.add_field "#{org.sub(/\/.*/,'')}:Ensembl Gene ID" do |position, values|
|
225
|
+
genes_at_positions[position]
|
226
|
+
end
|
227
|
+
|
228
|
+
genomic_mutations
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
task_description <<-EOF
|
233
|
+
Translates a collection of mutations in genomic coordinates into mutations in aminoacids for the
|
234
|
+
protein products of transcripts including those positions.
|
235
|
+
EOF
|
236
|
+
task_option :organism, "Organism", :string, "Hsa"
|
237
|
+
task_option :genomic_mutations, "Position (chr:position), Allele", :tsv
|
238
|
+
task_dependencies nil
|
239
|
+
task :genomic_mutations_to_protein_mutations => :tsv do |org,genomic_mutations|
|
240
|
+
genomic_mutations = case
|
241
|
+
when TSV === genomic_mutations
|
242
|
+
genomic_mutations
|
243
|
+
else
|
244
|
+
TSV.new StringIO.new(genomic_mutations), :list
|
245
|
+
end
|
246
|
+
|
247
|
+
genomic_mutations.key_field = "Position"
|
248
|
+
genomic_mutations.fields = ["Mutation"]
|
249
|
+
|
207
250
|
positions = genomic_mutations.keys.collect{|l| l.split(":")}
|
208
251
|
|
209
252
|
step(:prepare, "Prepare Results")
|
210
253
|
results = TSV.new({})
|
211
254
|
results.key_field = "Position"
|
212
|
-
results.fields = ["Ensembl Transcript ID", "Mutation"]
|
255
|
+
results.fields = ["#{org.sub(/\/.*/,'')}:Ensembl Transcript ID", "Protein Mutation"]
|
213
256
|
results.type = :double
|
257
|
+
results.filename = path
|
258
|
+
|
214
259
|
|
215
260
|
step(:resources, "Load Resources")
|
216
261
|
transcript_sequence = Organism.transcript_sequence(org).tsv(:single, :persistence => true)
|
@@ -229,7 +274,6 @@ module Organism
|
|
229
274
|
|
230
275
|
transcripts.each do |transcript, offset_info|
|
231
276
|
offset, strand = offset_info
|
232
|
-
ddd strand
|
233
277
|
begin
|
234
278
|
codon = Organism.codon_at_transcript_position(org, transcript, offset, transcript_sequence, transcript_5utr)
|
235
279
|
rescue
|
@@ -237,12 +281,9 @@ module Organism
|
|
237
281
|
next
|
238
282
|
end
|
239
283
|
|
240
|
-
ddd codon
|
241
284
|
if not codon.nil?
|
242
285
|
alleles.each do |allele|
|
243
|
-
ddd allele
|
244
286
|
allele = Misc::BASE2COMPLEMENT[allele] if strand == -1
|
245
|
-
ddd allele
|
246
287
|
change = Organism.codon_change(allele, *codon.values_at(0,1))
|
247
288
|
pos_code = position * ":"
|
248
289
|
mutation = [change.first, codon.last + 1, change.last] * ""
|
@@ -323,7 +364,7 @@ X 10085674 C T
|
|
323
364
|
#positions = positions.select ["10:98099540"]
|
324
365
|
|
325
366
|
Organism.basedir = Rbbt.tmp.organism.sequence.jobs.find :user
|
326
|
-
job = Organism.job :
|
367
|
+
job = Organism.job :genomic_mutations_to_protein_mutations, "Metastasis", org, positions.slice("Tumor")
|
327
368
|
job.run
|
328
369
|
|
329
370
|
while not job.done?
|
@@ -0,0 +1,44 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__),'..', '..', '..', '..', 'lib'))
|
2
|
+
require 'rbbt/sources/biomart'
|
3
|
+
require 'rbbt/sources/entrez'
|
4
|
+
require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
5
|
+
|
6
|
+
$taxs = [10116]
|
7
|
+
$scientific_name = "Rattus norvegicus"
|
8
|
+
|
9
|
+
$biomart_db = 'rnorvegicus_gene_ensembl'
|
10
|
+
|
11
|
+
$biomart_lexicon = [
|
12
|
+
[ 'Associated Gene Name' , "external_gene_id"],
|
13
|
+
[ 'HGNC symbol', "hgnc_symbol" ],
|
14
|
+
[ 'HGNC automatic gene name', "hgnc_automatic_gene_name" ],
|
15
|
+
[ 'HGNC curated gene name ', "hgnc_curated_gene_name" ],
|
16
|
+
]
|
17
|
+
|
18
|
+
$biomart_identifiers = [
|
19
|
+
['Associated Gene Name' , "external_gene_id"],
|
20
|
+
['Protein ID' , "protein_id"] ,
|
21
|
+
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
22
|
+
['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
|
23
|
+
['RefSeq Protein ID' , "refseq_peptide"] ,
|
24
|
+
['RefSeq DNA ID' , "refseq_dna"] ,
|
25
|
+
['EMBL (Genbank) ID' , "embl"] ,
|
26
|
+
['RGD ID' , "rgd"] ,
|
27
|
+
['RGD Symbol' , "rgd_symbol"] ,
|
28
|
+
|
29
|
+
['Affy rae230a', "affy_rae230a"],
|
30
|
+
['Affy rae230b', "affy_rae230b"],
|
31
|
+
['Affy RaGene', "affy_ragene_1_0_st_v1"],
|
32
|
+
['Affy rat230 2', "affy_rat230_2"],
|
33
|
+
['Affy RaEx', "affy_raex_1_0_st_v1"],
|
34
|
+
['Affy rg u34a', "affy_rg_u34a"],
|
35
|
+
['Affy rg u34b', "affy_rg_u34b"],
|
36
|
+
['Affy rg u34c', "affy_rg_u34c"],
|
37
|
+
['Affy rn u34', "affy_rn_u34"],
|
38
|
+
['Affy rt u34', "affy_rt_u34"],
|
39
|
+
['Agilent WholeGenome',"agilent_wholegenome" ],
|
40
|
+
['Codelink ID ', "codelink"],
|
41
|
+
]
|
42
|
+
|
43
|
+
$namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
|
44
|
+
load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
|
@@ -103,7 +103,7 @@ file 'scientific_name' do |t|
|
|
103
103
|
end
|
104
104
|
|
105
105
|
file 'identifiers' do |t|
|
106
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [])
|
106
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => $namespace)
|
107
107
|
$biomart_identifiers.each do |name, key, prefix|
|
108
108
|
if prefix
|
109
109
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
@@ -114,20 +114,20 @@ file 'identifiers' do |t|
|
|
114
114
|
end
|
115
115
|
|
116
116
|
file 'gene_transcripts' do |t|
|
117
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat)
|
117
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
|
118
118
|
|
119
119
|
File.open(t.name, 'w') do |f| f.puts transcripts end
|
120
120
|
end
|
121
121
|
|
122
122
|
file 'transcripts' => 'gene_positions' do |t|
|
123
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list)
|
123
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => $namespace)
|
124
124
|
transcripts.attach TSV.new('gene_positions'), "Chromosome Name"
|
125
125
|
|
126
126
|
File.open(t.name, 'w') do |f| f.puts transcripts end
|
127
127
|
end
|
128
128
|
|
129
129
|
file 'transcript_3utr' do |t|
|
130
|
-
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :
|
130
|
+
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :namespace => $namespace)
|
131
131
|
|
132
132
|
File.open(t.name, 'w') do |f|
|
133
133
|
f.puts "#: :type=:single#cast=to_i"
|
@@ -142,7 +142,7 @@ end
|
|
142
142
|
|
143
143
|
|
144
144
|
file 'transcript_5utr' do |t|
|
145
|
-
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :
|
145
|
+
utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :namespace => $namespace)
|
146
146
|
|
147
147
|
File.open(t.name, 'w') do |f|
|
148
148
|
f.puts "#: :type=:single#cast=to_i"
|
@@ -162,7 +162,7 @@ file 'gene_positions' do |t|
|
|
162
162
|
end
|
163
163
|
|
164
164
|
file 'gene_sequence' do |t|
|
165
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :
|
165
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
166
166
|
|
167
167
|
File.open(t.name, 'w') do |f|
|
168
168
|
f.puts "#: :type=:single"
|
@@ -179,7 +179,7 @@ file 'gene_sequence' do |t|
|
|
179
179
|
end
|
180
180
|
|
181
181
|
file 'protein_sequence' do |t|
|
182
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :
|
182
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
183
183
|
|
184
184
|
File.open(t.name, 'w') do |f|
|
185
185
|
f.puts "#: :type=:single"
|
@@ -197,20 +197,20 @@ file 'protein_sequence' do |t|
|
|
197
197
|
end
|
198
198
|
|
199
199
|
file 'exons' => 'gene_positions' do |t|
|
200
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list)
|
200
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => $namespace)
|
201
201
|
exons.attach TSV.new('gene_positions'), "Chromosome Name"
|
202
202
|
|
203
203
|
File.open(t.name, 'w') do |f| f.puts exons end
|
204
204
|
end
|
205
205
|
|
206
206
|
file 'transcript_exons' do |t|
|
207
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true)
|
207
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true, :namespace => $namespace)
|
208
208
|
|
209
209
|
File.open(t.name, 'w') do |f| f.puts exons end
|
210
210
|
end
|
211
211
|
|
212
212
|
file 'transcript_sequence' do |t|
|
213
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :
|
213
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
214
214
|
|
215
215
|
File.open(t.name, 'w') do |f|
|
216
216
|
f.puts "#: :type=:single"
|
@@ -232,28 +232,28 @@ $biomart_variation_filter = ["snptype_filters", "COMPLEX_INDEL,COMPLEX_INDEL&NMD
|
|
232
232
|
$biomart_variation_filter = ["snptype_filters", 'COMPLEX_INDEL&NMD_TRANSCRIPT']
|
233
233
|
|
234
234
|
file 'germline_variations' do |t|
|
235
|
-
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variations, [], nil, :keep_empty => true, :type => :list, :
|
236
|
-
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
235
|
+
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variations, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
237
236
|
end
|
238
237
|
|
239
238
|
file 'germline_variation_positions' do |t|
|
240
|
-
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variation_positions, [], nil, :keep_empty => true, :type => :list, :
|
239
|
+
variations = BioMart.tsv($biomart_db, $biomart_germline_variation_id, $biomart_germline_variation_positions, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
241
240
|
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
242
241
|
end
|
243
242
|
|
244
243
|
file 'somatic_variations' do |t|
|
245
|
-
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variations, [], nil, :keep_empty => true, :type => :list, :
|
244
|
+
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variations, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
246
245
|
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
247
246
|
end
|
248
247
|
|
249
248
|
file 'somatic_variation_positions' do |t|
|
250
|
-
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variation_positions, [], nil, :keep_empty => true, :type => :list, :
|
249
|
+
variations = BioMart.tsv($biomart_db, $biomart_somatic_variation_id, $biomart_somatic_variation_positions, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => $namespace)
|
251
250
|
File.open(t.name, 'w') do |f| f.puts variations.to_s end
|
252
251
|
end
|
253
252
|
|
254
253
|
file 'gene_pmids' do |t|
|
255
254
|
tsv = Entrez.entrez2pubmed($taxs)
|
256
|
-
text = "
|
255
|
+
text = "#: :namespace=#{$namespace}"
|
256
|
+
text += "#Entrez Gene ID\tPMID"
|
257
257
|
tsv.each do |gene, pmids|
|
258
258
|
text << "\n" << gene << "\t" << pmids * "|"
|
259
259
|
end
|
@@ -270,7 +270,8 @@ file 'exon_offsets' => %w(exons transcript_exons gene_transcripts transcripts tr
|
|
270
270
|
transcript_exons = TSV.new('transcript_exons', :double, :fields => ["Ensembl Exon ID","Exon Rank in Transcript"], :persistence => true )
|
271
271
|
|
272
272
|
|
273
|
-
string = "
|
273
|
+
string = "#: :namespace=#{$namespace}"
|
274
|
+
string += "#Ensembl Exon ID\tEnsembl Transcript ID\tOffset\n"
|
274
275
|
exons.each do |exon, info|
|
275
276
|
gene, start, finish, strand, chr = info
|
276
277
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../../test_helper'
|
2
2
|
require 'rbbt/sources/biomart'
|
3
|
+
require 'rbbt/util/tmpfile'
|
3
4
|
require 'test/unit'
|
4
5
|
|
5
6
|
class TestBioMart < Test::Unit::TestCase
|
@@ -20,16 +21,28 @@ class TestBioMart < Test::Unit::TestCase
|
|
20
21
|
|
21
22
|
def test_query
|
22
23
|
data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false})
|
23
|
-
|
24
24
|
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
25
|
+
|
26
|
+
TmpFile.with_file do |f|
|
27
|
+
filename = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
28
|
+
data = TSV.new Open.open(filename)
|
29
|
+
assert(data['852236']['external_gene_id'].include? 'YBL044W')
|
30
|
+
end
|
25
31
|
end
|
26
32
|
|
27
33
|
def test_tsv
|
28
34
|
data = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false})
|
29
|
-
|
30
35
|
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
31
36
|
assert_equal 'Entrez Gene', data.key_field
|
32
37
|
assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
|
38
|
+
|
39
|
+
TmpFile.with_file do |f|
|
40
|
+
filename = BioMart.tsv('scerevisiae_gene_ensembl',['Entrez Gene', 'entrezgene'], [['Protein ID', 'protein_id'],['RefSeq Peptide','refseq_peptide']], [], nil, :nocache => false, :wget_options => { :quiet => false}, :filename => f)
|
41
|
+
data = TSV.new Open.open(filename, :merge => true)
|
42
|
+
assert(data['852236']['Protein ID'].include? 'CAA84864')
|
43
|
+
assert_equal 'Entrez Gene', data.key_field
|
44
|
+
assert_equal ['Protein ID', 'RefSeq Peptide'], data.fields
|
45
|
+
end
|
33
46
|
end
|
34
47
|
end
|
35
48
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 15
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 4
|
9
|
+
- 0
|
10
|
+
version: 0.4.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-03-
|
18
|
+
date: 2011-03-23 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/rbbt/sources/polysearch.rb
|
96
96
|
- lib/rbbt/sources/pubmed.rb
|
97
97
|
- share/install/Organism/Hsa/Rakefile
|
98
|
+
- share/install/Organism/Rno/Rakefile
|
98
99
|
- share/install/Organism/Sce/Rakefile
|
99
100
|
- share/install/Organism/organism_helpers.rb
|
100
101
|
- share/install/lib/helpers.rb
|