rbbt-sources 3.0.8 → 3.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7da001935348263d932ee50674a7988587cac6a9
|
4
|
+
data.tar.gz: cd1fd8a320c3d8ada473b0faf6ca6d8960f7646d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d43e22521331578c7d0e078c4b666e18b8af4666de5e389b4c99d98a80f924b59257b0838f2785f04aa6d4e92ce10d2f29faac22dcd8dceb8dc33d9619bc2fb
|
7
|
+
data.tar.gz: 779b4e1866adb97fe2b4ee1f6b61728087aa8a5212ed789b89d1bf9cd6a57970c74b499c01b3aa8993613a123189833ad37a1411f78f04df1957340534cd2bcb
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -35,15 +35,15 @@ module BioMart
|
|
35
35
|
if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
|
36
36
|
raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
|
37
37
|
end
|
38
|
-
|
39
|
-
|
40
|
-
Log.debug "Using Archive URL #{
|
38
|
+
Thread.current['archive'] = date
|
39
|
+
Thread.current['archive_url'] = BIOMART_URL.sub(/http:\/\/biomart\./, 'http://' + date + '.archive.ensembl.')
|
40
|
+
Log.debug "Using Archive URL #{ Thread.current['archive_url'] }"
|
41
41
|
end
|
42
42
|
|
43
43
|
def self.unset_archive
|
44
44
|
Log.debug "Restoring current version URL #{BIOMART_URL}"
|
45
|
-
|
46
|
-
|
45
|
+
Thread.current['archive'] = nil
|
46
|
+
Thread.current['archive_url'] = nil
|
47
47
|
end
|
48
48
|
|
49
49
|
def self.with_archive(data)
|
@@ -79,7 +79,7 @@ module BioMart
|
|
79
79
|
query.sub!(/<!--MAIN-->/,"<Attribute name = \"#{main}\" />")
|
80
80
|
query.sub!(/<!--ATTRIBUTES-->/, attrs.collect{|name| "<Attribute name = \"#{ name }\"/>"}.join("\n") )
|
81
81
|
|
82
|
-
url =
|
82
|
+
url = Thread.current['archive_url'] ? Thread.current['archive_url'] + query.gsub(/\n/,' ') : BIOMART_URL + query.gsub(/\n/,' ')
|
83
83
|
|
84
84
|
begin
|
85
85
|
response = Open.read(url, open_options.dup)
|
@@ -191,8 +191,8 @@ module BioMart
|
|
191
191
|
def self.tsv(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
192
192
|
attrs ||= []
|
193
193
|
|
194
|
-
if
|
195
|
-
attrs = attrs.reject{|attr| (MISSING_IN_ARCHIVE[
|
194
|
+
if Thread.current['archive_url']
|
195
|
+
attrs = attrs.reject{|attr| (MISSING_IN_ARCHIVE[Thread.current['archive']] || []).include? attr[1]}
|
196
196
|
end
|
197
197
|
|
198
198
|
|
data/share/Ensembl/release_dates
CHANGED
@@ -99,5 +99,6 @@ $biomart_go_2009= [
|
|
99
99
|
["GO CC ID", 'go_cellular_component_id'],
|
100
100
|
]
|
101
101
|
|
102
|
-
|
102
|
+
#$namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
|
103
|
+
Thread.current["namespace"] = File.basename(File.dirname(File.expand_path(__FILE__)))
|
103
104
|
load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'net/ftp'
|
2
2
|
require 'rbbt/sources/ensembl_ftp'
|
3
3
|
|
4
|
+
#Thread.current['namespace'] = $namespace
|
4
5
|
|
5
6
|
$biomart_ensembl_gene = ['Ensembl Gene ID', 'ensembl_gene_id']
|
6
7
|
$biomart_ensembl_protein = ['Ensembl Protein ID', 'ensembl_peptide_id']
|
@@ -92,7 +93,7 @@ file 'ortholog_key' do |t|
|
|
92
93
|
end
|
93
94
|
|
94
95
|
file 'identifiers' do |t|
|
95
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace =>
|
96
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_identifiers, [], nil, :namespace => Thread.current['namespace'])
|
96
97
|
identifiers.unnamed = true
|
97
98
|
|
98
99
|
$biomart_identifiers.each do |name, key, prefix|
|
@@ -150,7 +151,7 @@ end
|
|
150
151
|
|
151
152
|
|
152
153
|
file 'protein_identifiers' do |t|
|
153
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_identifiers, [], nil, :namespace =>
|
154
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_identifiers, [], nil, :namespace => Thread.current['namespace'])
|
154
155
|
$biomart_protein_identifiers.each do |name, key, prefix|
|
155
156
|
if prefix
|
156
157
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
@@ -161,7 +162,7 @@ file 'protein_identifiers' do |t|
|
|
161
162
|
end
|
162
163
|
|
163
164
|
file 'transcript_probes' do |t|
|
164
|
-
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace =>
|
165
|
+
identifiers = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_probe_identifiers, [], nil, :namespace => Thread.current['namespace'])
|
165
166
|
$biomart_probe_identifiers.each do |name, key, prefix|
|
166
167
|
if prefix
|
167
168
|
identifiers.process name do |field, key, values| field.each{|v| v.replace "#{prefix}:#{v}"} end
|
@@ -172,13 +173,13 @@ file 'transcript_probes' do |t|
|
|
172
173
|
end
|
173
174
|
|
174
175
|
file 'gene_transcripts' do |t|
|
175
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace =>
|
176
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => Thread.current['namespace'])
|
176
177
|
|
177
178
|
Misc.sensiblewrite(t.name, transcripts.to_s)
|
178
179
|
end
|
179
180
|
|
180
181
|
file 'transcripts' => 'gene_positions' do |t|
|
181
|
-
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace =>
|
182
|
+
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => Thread.current['namespace'])
|
182
183
|
transcripts.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
183
184
|
|
184
185
|
Misc.sensiblewrite(t.name, transcripts.to_s)
|
@@ -191,7 +192,7 @@ file 'gene_positions' do |t|
|
|
191
192
|
end
|
192
193
|
|
193
194
|
file 'gene_sequence' do |t|
|
194
|
-
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace =>
|
195
|
+
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => Thread.current['namespace'])
|
195
196
|
|
196
197
|
Misc.sensiblewrite(t.name) do |f|
|
197
198
|
f.puts "#: :type=:single"
|
@@ -208,20 +209,20 @@ file 'gene_sequence' do |t|
|
|
208
209
|
end
|
209
210
|
|
210
211
|
file 'exons' => 'gene_positions' do |t|
|
211
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace =>
|
212
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => Thread.current['namespace'])
|
212
213
|
exons.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
213
214
|
|
214
215
|
Misc.sensiblewrite(t.name, exons.to_s)
|
215
216
|
end
|
216
217
|
|
217
218
|
file 'transcript_exons' do |t|
|
218
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true, :namespace =>
|
219
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_exons, [], nil, :keep_empty => true, :namespace => Thread.current['namespace'])
|
219
220
|
|
220
221
|
Misc.sensiblewrite(t.name, exons.to_s)
|
221
222
|
end
|
222
223
|
|
223
224
|
file 'exon_phase' do |t|
|
224
|
-
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exon_phase, [], nil, :keep_empty => true, :namespace =>
|
225
|
+
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exon_phase, [], nil, :keep_empty => true, :namespace => Thread.current['namespace'])
|
225
226
|
|
226
227
|
Misc.sensiblewrite(t.name, exons.to_s)
|
227
228
|
end
|
@@ -263,11 +264,11 @@ $biomart_variation_id = ["SNP ID", "refsnp_id"]
|
|
263
264
|
$biomart_variation_position = [["Chromosome Name", "chr_name"], ["Chromosome Start", "chrom_start"], ["Variant Alleles", "allele"]]
|
264
265
|
|
265
266
|
file 'germline_variations' do |t|
|
266
|
-
BioMart.tsv($biomart_db_germline_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace =>
|
267
|
+
BioMart.tsv($biomart_db_germline_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => Thread.current['namespace'])
|
267
268
|
end
|
268
269
|
|
269
270
|
file 'somatic_variations' do |t|
|
270
|
-
BioMart.tsv($biomart_db_somatic_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace =>
|
271
|
+
BioMart.tsv($biomart_db_somatic_variation, $biomart_variation_id, $biomart_variation_position, [], nil, :keep_empty => true, :type => :list, :filename => t.name, :namespace => Thread.current['namespace'])
|
271
272
|
end
|
272
273
|
|
273
274
|
|
@@ -275,7 +276,7 @@ end
|
|
275
276
|
|
276
277
|
file 'gene_pmids' do |t|
|
277
278
|
tsv = Entrez.entrez2pubmed($taxs)
|
278
|
-
text = "#: :namespace=#{
|
279
|
+
text = "#: :namespace=#{Thread.current['namespace']}\n"
|
279
280
|
text += "#Entrez Gene ID\tPMID"
|
280
281
|
tsv.each do |gene, pmids|
|
281
282
|
text << "\n" << gene << "\t" << pmids * "|"
|
@@ -322,7 +323,7 @@ file 'exon_offsets' => %w(exons transcript_exons gene_transcripts transcripts tr
|
|
322
323
|
transcript_info = TSV.open('transcripts', :list, :fields => ["Ensembl Protein ID"])
|
323
324
|
transcript_exons = TSV.open('transcript_exons', :double, :fields => ["Ensembl Exon ID","Exon Rank in Transcript"])
|
324
325
|
|
325
|
-
string = "#: :namespace=#{
|
326
|
+
string = "#: :namespace=#{Thread.current['namespace']}\n"
|
326
327
|
string += "#Ensembl Exon ID\tEnsembl Transcript ID\tOffset\n"
|
327
328
|
|
328
329
|
exon_transcripts.unnamed = true
|
@@ -353,7 +354,7 @@ end
|
|
353
354
|
|
354
355
|
file 'gene_go' do |t|
|
355
356
|
if File.basename(FileUtils.pwd) =~ /^[a-z]{3}([0-9]{4})$/i and $1.to_i <= 2009
|
356
|
-
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go_2009, [], nil, :type => :double, :namespace =>
|
357
|
+
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go_2009, [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
357
358
|
|
358
359
|
goterms.each do |key, values|
|
359
360
|
values.each do |list| list.uniq! end
|
@@ -371,7 +372,7 @@ file 'gene_go' do |t|
|
|
371
372
|
|
372
373
|
Misc.sensiblewrite(t.name, goterms.slice(["GO ID", "GO Namespace"]).to_s)
|
373
374
|
else
|
374
|
-
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace =>
|
375
|
+
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
375
376
|
|
376
377
|
Misc.sensiblewrite(t.name, goterms.to_s)
|
377
378
|
end
|
@@ -425,19 +426,19 @@ end
|
|
425
426
|
|
426
427
|
|
427
428
|
file 'gene_biotype' do |t|
|
428
|
-
biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace =>
|
429
|
+
biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace => Thread.current['namespace'])
|
429
430
|
|
430
431
|
Misc.sensiblewrite(t.name, biotype.to_s)
|
431
432
|
end
|
432
433
|
|
433
434
|
file 'gene_pfam' do |t|
|
434
|
-
pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace =>
|
435
|
+
pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
435
436
|
|
436
437
|
Misc.sensiblewrite(t.name, pfam.to_s)
|
437
438
|
end
|
438
439
|
|
439
440
|
file 'chromosomes' do |t|
|
440
|
-
goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace =>
|
441
|
+
goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace => Thread.current['namespace'])
|
441
442
|
|
442
443
|
Misc.sensiblewrite(t.name, goterms.to_s)
|
443
444
|
end
|
@@ -483,7 +484,7 @@ rule /^chromosome_.*/ do |t|
|
|
483
484
|
|
484
485
|
raise "Fasta file for chromosome not found: '#{ chr }' - #{ archive }, #{ release }" if file.nil?
|
485
486
|
|
486
|
-
Log.debug("Downloading chromosome sequence: #{ file }")
|
487
|
+
Log.debug("Downloading chromosome sequence: #{ file } - #{release} #{t.name}")
|
487
488
|
|
488
489
|
Misc.lock t.name + '.rake' do
|
489
490
|
TmpFile.with_file do |tmpfile|
|
@@ -497,13 +498,13 @@ end
|
|
497
498
|
rule /^possible_ortholog_(.*)/ do |t|
|
498
499
|
other = t.name.match(/ortholog_(.*)/)[1]
|
499
500
|
other_key = Organism.ortholog_key(other).produce.read
|
500
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace =>
|
501
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
|
501
502
|
end
|
502
503
|
|
503
504
|
rule /^ortholog_(.*)/ do |t|
|
504
505
|
other = t.name.match(/ortholog_(.*)/)[1]
|
505
506
|
other_key = Organism.ortholog_key(other).produce.read
|
506
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace =>
|
507
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
|
507
508
|
end
|
508
509
|
|
509
510
|
rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
@@ -513,15 +514,15 @@ rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
|
513
514
|
Misc.in_dir(archive) do
|
514
515
|
BioMart.set_archive archive
|
515
516
|
begin
|
516
|
-
old_namespace =
|
517
|
-
|
517
|
+
old_namespace = Thread.current['namespace']
|
518
|
+
Thread.current['namespace'] = Thread.current['namespace'] + "/" << archive
|
518
519
|
Rake::Task[task].invoke
|
519
520
|
rescue
|
520
521
|
Log.error "Error producing archived (#{archive}) version of #{task}: #{t.name}"
|
521
522
|
Log.exception $!
|
522
523
|
raise $!
|
523
524
|
ensure
|
524
|
-
|
525
|
+
Thread.current['namespace'] = old_namespace
|
525
526
|
end
|
526
527
|
BioMart.unset_archive
|
527
528
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|