rbbt-sources 3.0.8 → 3.0.9
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7da001935348263d932ee50674a7988587cac6a9
|
4
|
+
data.tar.gz: cd1fd8a320c3d8ada473b0faf6ca6d8960f7646d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d43e22521331578c7d0e078c4b666e18b8af4666de5e389b4c99d98a80f924b59257b0838f2785f04aa6d4e92ce10d2f29faac22dcd8dceb8dc33d9619bc2fb
|
7
|
+
data.tar.gz: 779b4e1866adb97fe2b4ee1f6b61728087aa8a5212ed789b89d1bf9cd6a57970c74b499c01b3aa8993613a123189833ad37a1411f78f04df1957340534cd2bcb
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -35,15 +35,15 @@ module BioMart
|
|
35
35
|
if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
|
36
36
|
raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
|
37
37
|
end
|
38
|
-
|
39
|
-
|
40
|
-
Log.debug "Using Archive URL #{
|
38
|
+
Thread.current['archive'] = date
|
39
|
+
Thread.current['archive_url'] = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
|
40
|
+
Log.debug "Using Archive URL #{ Thread.current['archive_url'] }"
|
41
41
|
end
|
42
42
|
|
43
43
|
def self.unset_archive
|
44
44
|
Log.debug "Restoring current version URL #{BIOMART_URL}"
|
45
|
-
|
46
|
-
|
45
|
+
Thread.current['archive'] = nil
|
46
|
+
Thread.current['archive_url'] = nil
|
47
47
|
end
|
48
48
|
|
49
49
|
def self.with_archive(data)
|
@@ -79,7 +79,7 @@ module BioMart
|
|
79
79
|
query.sub!(/<!--MAIN-->/,"<Attribute name = \"#{main}\" />")
|
80
80
|
query.sub!(/<!--ATTRIBUTES-->/, attrs.collect{|name| "<Attribute name = \"#{ name }\"/>"}.join("\n") )
|
81
81
|
|
82
|
-
url =
|
82
|
+
url = Thread.current['archive_url'] ? Thread.current['archive_url'] + query.gsub(/\n/,' ') : BIOMART_URL + query.gsub(/\n/,' ')
|
83
83
|
|
84
84
|
begin
|
85
85
|
response = Open.read(url, open_options.dup)
|
@@ -191,8 +191,8 @@ module BioMart
|
|
191
191
|
def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
192
192
|
attrs ||= []
|
193
193
|
|
194
|
-
if
|
195
|
-
attrs = attrs.reject{|attr| (MISSING_IN_ARCHIVE[
|
194
|
+
if Thread.current['archive_url']
|
195
|
+
attrs = attrs.reject{|attr| (MISSING_IN_ARCHIVE[Thread.current['archive']] || []).include? attr[1]}
|
196
196
|
end
|
197
197
|
|
198
198
|
|
data/share/Ensembl/release_dates
CHANGED
@@ -99,5 +99,6 @@ $biomart_go_2009= [
|
|
99
99
|
["GO CC ID", 'go_cellular_component_id'],
|
100
100
|
]
|
101
101
|
|
102
|
-
|
102
|
+
#$namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
|
103
|
+
Thread.current["namespace"] = File.basename(File.dirname(File.expand_path(__FILE__)))
|
103
104
|
load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'net/ftp'
|
2
2
|
require 'rbbt/sources/ensembl_ftp'
|
3
3
|
|
4
|
+
#Thread.current['namespace'] = $namespace
|
4
5
|
|
5
6
|
$biomart_ensembl_gene = ['Ensembl Gene ID', 'ensembl_gene_id']
|
6
7
|
$biomart_ensembl_protein = ['Ensembl Protein ID', 'ensembl_peptide_id']
|
@@ -92,7 +93,7 @@ file 'ortholog_key' do |t|
|
|
92
93
|
end
|
93
94
|
|
94
95
|
file 'identifiers' do |t|
|
95
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace =>
|
96
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => Thread.current['namespace'])
|
96
97
|
identifiers.unnamed = true
|
97
98
|
|
98
99
|
$biomart_identifiers.each do |name, key, prefix|
|
@@ -150,7 +151,7 @@ end
|
|
150
151
|
|
151
152
|
|
152
153
|
file 'protein_identifiers' do |t|
|
153
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_identifiers, [], nil, :namespace =>
|
154
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_identifiers, [], nil, :namespace => Thread.current['namespace'])
|
154
155
|
$biomart_protein_identifiers.each do |name, key, prefix|
|
155
156
|
if prefix
|
156
157
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
@@ -161,7 +162,7 @@ file 'protein_identifiers' do |t|
|
|
161
162
|
end
|
162
163
|
|
163
164
|
file 'transcript_probes' do |t|
|
164
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace =>
|
165
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace => Thread.current['namespace'])
|
165
166
|
$biomart_probe_identifiers.each do |name, key, prefix|
|
166
167
|
if prefix
|
167
168
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
@@ -172,13 +173,13 @@ file 'transcript_probes' do |t|
|
|
172
173
|
end
|
173
174
|
|
174
175
|
file 'gene_transcripts' do |t|
|
175
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace =>
|
176
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => Thread.current['namespace'])
|
176
177
|
|
177
178
|
Misc.sensiblewrite(t.name, transcripts.to_s)
|
178
179
|
end
|
179
180
|
|
180
181
|
file 'transcripts' => 'gene_positions' do |t|
|
181
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace =>
|
182
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => Thread.current['namespace'])
|
182
183
|
transcripts.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
183
184
|
|
184
185
|
Misc.sensiblewrite(t.name, transcripts.to_s)
|
@@ -191,7 +192,7 @@ file 'gene_positions' do |t|
|
|
191
192
|
end
|
192
193
|
|
193
194
|
file 'gene_sequence' do |t|
|
194
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace =>
|
195
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => Thread.current['namespace'])
|
195
196
|
|
196
197
|
Misc.sensiblewrite(t.name) do |f|
|
197
198
|
f.puts "#: :type=:single"
|
@@ -208,20 +209,20 @@ file 'gene_sequence' do |t|
|
|
208
209
|
end
|
209
210
|
|
210
211
|
file 'exons' => 'gene_positions' do |t|
|
211
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace =>
|
212
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => Thread.current['namespace'])
|
212
213
|
exons.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
213
214
|
|
214
215
|
Misc.sensiblewrite(t.name, exons.to_s)
|
215
216
|
end
|
216
217
|
|
217
218
|
file 'transcript_exons' do |t|
|
218
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true, :namespace =>
|
219
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true, :namespace => Thread.current['namespace'])
|
219
220
|
|
220
221
|
Misc.sensiblewrite(t.name, exons.to_s)
|
221
222
|
end
|
222
223
|
|
223
224
|
file 'exon_phase' do |t|
|
224
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exon_phase, [], nil, :keep_empty => true, :namespace =>
|
225
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exon_phase, [], nil, :keep_empty => true, :namespace => Thread.current['namespace'])
|
225
226
|
|
226
227
|
Misc.sensiblewrite(t.name, exons.to_s)
|
227
228
|
end
|
@@ -263,11 +264,11 @@ $biomart_variation_id = ["SNP ID", "refsnp_id"]
|
|
263
264
|
$biomart_variation_position = [["Chromosome Name", "chr_name"], ["Chromosome Start", "chrom_start"], ["Variant Alleles", "allele"]]
|
264
265
|
|
265
266
|
file 'germline_variations' do |t|
|
266
|
-
BioMart.tsv($biomart_db_germline_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace =>
|
267
|
+
BioMart.tsv($biomart_db_germline_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => Thread.current['namespace'])
|
267
268
|
end
|
268
269
|
|
269
270
|
file 'somatic_variations' do |t|
|
270
|
-
BioMart.tsv($biomart_db_somatic_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace =>
|
271
|
+
BioMart.tsv($biomart_db_somatic_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => Thread.current['namespace'])
|
271
272
|
end
|
272
273
|
|
273
274
|
|
@@ -275,7 +276,7 @@ end
|
|
275
276
|
|
276
277
|
file 'gene_pmids' do |t|
|
277
278
|
tsv = Entrez.entrez2pubmed($taxs)
|
278
|
-
text = "#: :namespace=#{
|
279
|
+
text = "#: :namespace=#{Thread.current['namespace']}\n"
|
279
280
|
text += "#Entrez Gene ID\tPMID"
|
280
281
|
tsv.each do |gene, pmids|
|
281
282
|
text << "\n" << gene << "\t" << pmids * "|"
|
@@ -322,7 +323,7 @@ file 'exon_offsets' => %w(exons transcript_exons gene_transcripts transcripts tr
|
|
322
323
|
transcript_info = TSV.open('transcripts', :list, :fields => ["Ensembl Protein ID"])
|
323
324
|
transcript_exons = TSV.open('transcript_exons', :double, :fields => ["Ensembl Exon ID","Exon Rank in Transcript"])
|
324
325
|
|
325
|
-
string = "#: :namespace=#{
|
326
|
+
string = "#: :namespace=#{Thread.current['namespace']}\n"
|
326
327
|
string += "#Ensembl Exon ID\tEnsembl Transcript ID\tOffset\n"
|
327
328
|
|
328
329
|
exon_transcripts.unnamed = true
|
@@ -353,7 +354,7 @@ end
|
|
353
354
|
|
354
355
|
file 'gene_go' do |t|
|
355
356
|
if File.basename(FileUtils.pwd) =~ /^[a-z]{3}([0-9]{4})$/i and $1.to_i <= 2009
|
356
|
-
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go_2009, [], nil, :type => :double, :namespace =>
|
357
|
+
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go_2009, [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
357
358
|
|
358
359
|
goterms.each do |key, values|
|
359
360
|
values.each do |list| list.uniq! end
|
@@ -371,7 +372,7 @@ file 'gene_go' do |t|
|
|
371
372
|
|
372
373
|
Misc.sensiblewrite(t.name, goterms.slice(["GO ID", "GO Namespace"]).to_s)
|
373
374
|
else
|
374
|
-
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace =>
|
375
|
+
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
375
376
|
|
376
377
|
Misc.sensiblewrite(t.name, goterms.to_s)
|
377
378
|
end
|
@@ -425,19 +426,19 @@ end
|
|
425
426
|
|
426
427
|
|
427
428
|
file 'gene_biotype' do |t|
|
428
|
-
biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace =>
|
429
|
+
biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace => Thread.current['namespace'])
|
429
430
|
|
430
431
|
Misc.sensiblewrite(t.name, biotype.to_s)
|
431
432
|
end
|
432
433
|
|
433
434
|
file 'gene_pfam' do |t|
|
434
|
-
pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace =>
|
435
|
+
pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
435
436
|
|
436
437
|
Misc.sensiblewrite(t.name, pfam.to_s)
|
437
438
|
end
|
438
439
|
|
439
440
|
file 'chromosomes' do |t|
|
440
|
-
goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace =>
|
441
|
+
goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
441
442
|
|
442
443
|
Misc.sensiblewrite(t.name, goterms.to_s)
|
443
444
|
end
|
@@ -483,7 +484,7 @@ rule /^chromosome_.*/ do |t|
|
|
483
484
|
|
484
485
|
raise "Fasta file for chromosome not found: '#{ chr }' - #{ archive }, #{ release }" if file.nil?
|
485
486
|
|
486
|
-
Log.debug("Downloading chromosome sequence: #{ file }")
|
487
|
+
Log.debug("Downloading chromosome sequence: #{ file } - #{release} #{t.name}")
|
487
488
|
|
488
489
|
Misc.lock t.name + '.rake' do
|
489
490
|
TmpFile.with_file do |tmpfile|
|
@@ -497,13 +498,13 @@ end
|
|
497
498
|
rule /^possible_ortholog_(.*)/ do |t|
|
498
499
|
other = t.name.match(/ortholog_(.*)/)[1]
|
499
500
|
other_key = Organism.ortholog_key(other).produce.read
|
500
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace =>
|
501
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
|
501
502
|
end
|
502
503
|
|
503
504
|
rule /^ortholog_(.*)/ do |t|
|
504
505
|
other = t.name.match(/ortholog_(.*)/)[1]
|
505
506
|
other_key = Organism.ortholog_key(other).produce.read
|
506
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace =>
|
507
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
|
507
508
|
end
|
508
509
|
|
509
510
|
rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
@@ -513,15 +514,15 @@ rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
|
513
514
|
Misc.in_dir(archive) do
|
514
515
|
BioMart.set_archive archive
|
515
516
|
begin
|
516
|
-
old_namespace =
|
517
|
-
|
517
|
+
old_namespace = Thread.current['namespace']
|
518
|
+
Thread.current['namespace'] = Thread.current['namespace'] + "/" << archive
|
518
519
|
Rake::Task[task].invoke
|
519
520
|
rescue
|
520
521
|
Log.error "Error producing archived (#{archive}) version of #{task}: #{t.name}"
|
521
522
|
Log.exception $!
|
522
523
|
raise $!
|
523
524
|
ensure
|
524
|
-
|
525
|
+
Thread.current['namespace'] = old_namespace
|
525
526
|
end
|
526
527
|
BioMart.unset_archive
|
527
528
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|