rbbt-sources 3.0.4 → 3.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +2 -0
- data/share/install/Organism/organism_helpers.rb +37 -27
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d726091f839fd461aaaec6e98dcdf9f5f846d5
|
4
|
+
data.tar.gz: d3a32cbf0b7a84d43ab4cb97d91a4c0ac1794fb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40c7bced77cc6fccc91ad1be4cfe9110c5397aec28ed12508ea7ecf084b7f58bdebdd716a0d747fb74a2e28cb039bdf01161531e0c7477589c17d80aca2ad87c
|
7
|
+
data.tar.gz: d620dcf2f64f580eecfe32720d876f5f7eb420d88e25722cd98c23ccb330f8c640ead25b37ee006c7cd0d93f562b657da587e8d6d1b8bf8ce658ae10996e71c9
|
@@ -78,17 +78,17 @@ $biomart_exons = [
|
|
78
78
|
#{{{ Rules
|
79
79
|
|
80
80
|
file 'entrez_taxids' do |t|
|
81
|
-
|
81
|
+
Misc.sensiblewrite(t.name, $taxs * "\n")
|
82
82
|
end
|
83
83
|
|
84
84
|
file 'scientific_name' do |t|
|
85
|
-
|
85
|
+
Misc.sensiblewrite(t.name, $scientific_name)
|
86
86
|
end
|
87
87
|
|
88
88
|
file 'ortholog_key' do |t|
|
89
89
|
raise "Ortholog key not defined. Set up $ortholog_key in the organism specific Rakefile; example $ortholog_key = 'human_ensembl_gene'" unless defined? $ortholog_key and not $ortholog_key.nil?
|
90
90
|
|
91
|
-
|
91
|
+
Misc.sensiblewrite(t.name, $ortholog_key)
|
92
92
|
end
|
93
93
|
|
94
94
|
file 'identifiers' do |t|
|
@@ -134,7 +134,7 @@ file 'identifiers' do |t|
|
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
|
-
|
137
|
+
Misc.sensiblewrite(t.name, identifiers.to_s)
|
138
138
|
end
|
139
139
|
|
140
140
|
file 'lexicon' => 'identifiers' do |t|
|
@@ -157,7 +157,7 @@ file 'protein_identifiers' do |t|
|
|
157
157
|
end
|
158
158
|
end
|
159
159
|
|
160
|
-
|
160
|
+
Misc.sensiblewrite(t.name, identifiers.to_s)
|
161
161
|
end
|
162
162
|
|
163
163
|
file 'transcript_probes' do |t|
|
@@ -168,32 +168,32 @@ file 'transcript_probes' do |t|
|
|
168
168
|
end
|
169
169
|
end
|
170
170
|
|
171
|
-
|
171
|
+
Misc.sensiblewrite(t.name, identifiers.to_s)
|
172
172
|
end
|
173
173
|
|
174
174
|
file 'gene_transcripts' do |t|
|
175
175
|
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
|
176
176
|
|
177
|
-
|
177
|
+
Misc.sensiblewrite(t.name, transcripts.to_s)
|
178
178
|
end
|
179
179
|
|
180
180
|
file 'transcripts' => 'gene_positions' do |t|
|
181
181
|
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => $namespace)
|
182
182
|
transcripts.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
183
183
|
|
184
|
-
|
184
|
+
Misc.sensiblewrite(t.name, transcripts.to_s)
|
185
185
|
end
|
186
186
|
|
187
187
|
file 'gene_positions' do |t|
|
188
188
|
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [])
|
189
189
|
|
190
|
-
|
190
|
+
Misc.sensiblewrite(t.name, sequences.to_s)
|
191
191
|
end
|
192
192
|
|
193
193
|
file 'gene_sequence' do |t|
|
194
194
|
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
195
195
|
|
196
|
-
|
196
|
+
Misc.sensiblewrite(t.name) do |f|
|
197
197
|
f.puts "#: :type=:single"
|
198
198
|
f.puts "#Ensembl Gene ID\tGene Sequence"
|
199
199
|
sequences.each do |seq, genes|
|
@@ -211,7 +211,7 @@ file 'exons' => 'gene_positions' do |t|
|
|
211
211
|
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => $namespace)
|
212
212
|
exons.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
213
213
|
|
214
|
-
|
214
|
+
Misc.sensiblewrite(t.name, exons.to_s)
|
215
215
|
end
|
216
216
|
|
217
217
|
file 'transcript_exons' do |t|
|
@@ -372,7 +372,7 @@ file 'gene_go' do |t|
|
|
372
372
|
else
|
373
373
|
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace => $namespace)
|
374
374
|
|
375
|
-
|
375
|
+
Misc.sensiblewrite(t.name, goterms.to_s)
|
376
376
|
end
|
377
377
|
end
|
378
378
|
|
@@ -388,7 +388,7 @@ file 'gene_go_bp' => 'gene_go' do |t|
|
|
388
388
|
end
|
389
389
|
|
390
390
|
|
391
|
-
|
391
|
+
Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
|
392
392
|
end
|
393
393
|
|
394
394
|
file 'gene_go_cc' => 'gene_go' do |t|
|
@@ -403,7 +403,7 @@ file 'gene_go_cc' => 'gene_go' do |t|
|
|
403
403
|
end
|
404
404
|
|
405
405
|
|
406
|
-
|
406
|
+
Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
|
407
407
|
end
|
408
408
|
|
409
409
|
file 'gene_go_mf' => 'gene_go' do |t|
|
@@ -418,7 +418,7 @@ file 'gene_go_mf' => 'gene_go' do |t|
|
|
418
418
|
end
|
419
419
|
|
420
420
|
|
421
|
-
|
421
|
+
Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
|
422
422
|
end
|
423
423
|
|
424
424
|
|
@@ -426,19 +426,19 @@ end
|
|
426
426
|
file 'gene_biotype' do |t|
|
427
427
|
biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace => $namespace)
|
428
428
|
|
429
|
-
|
429
|
+
Misc.sensiblewrite(t.name, biotype.to_s)
|
430
430
|
end
|
431
431
|
|
432
432
|
file 'gene_pfam' do |t|
|
433
433
|
pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
|
434
434
|
|
435
|
-
|
435
|
+
Misc.sensiblewrite(t.name, pfam.to_s)
|
436
436
|
end
|
437
437
|
|
438
438
|
file 'chromosomes' do |t|
|
439
439
|
goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace => $namespace)
|
440
440
|
|
441
|
-
|
441
|
+
Misc.sensiblewrite(t.name, goterms.to_s)
|
442
442
|
end
|
443
443
|
|
444
444
|
file 'blacklist_chromosomes' => 'chromosomes' do |t|
|
@@ -449,12 +449,12 @@ end
|
|
449
449
|
file 'blacklist_genes' => ['blacklist_chromosomes', 'gene_positions'] do |t|
|
450
450
|
Open.read(t.prerequisites.first)
|
451
451
|
genes = CMD.cmd("grep -f '#{t.prerequisites.first}' | cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq
|
452
|
-
|
452
|
+
Misc.sensiblewrite(t.name, genes * "\n")
|
453
453
|
end
|
454
454
|
|
455
455
|
file 'sanctioned_genes' => ['blacklist_genes', 'gene_positions'] do |t|
|
456
456
|
genes = CMD.cmd("cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq - Open.read(t.prerequisites.first).split("\n")
|
457
|
-
|
457
|
+
Misc.sensiblewrite(t.name, genes * "\n")
|
458
458
|
end
|
459
459
|
|
460
460
|
|
@@ -511,7 +511,13 @@ rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
|
511
511
|
task = $2
|
512
512
|
Misc.in_dir(archive) do
|
513
513
|
BioMart.set_archive archive
|
514
|
-
|
514
|
+
begin
|
515
|
+
Rake::Task[task].invoke
|
516
|
+
rescue
|
517
|
+
Log.error "Error producing archived (#{archive}) version of #{task}: #{t.name}"
|
518
|
+
Log.exception $!
|
519
|
+
raise $!
|
520
|
+
end
|
515
521
|
BioMart.unset_archive
|
516
522
|
end
|
517
523
|
end
|
@@ -520,7 +526,7 @@ end
|
|
520
526
|
|
521
527
|
|
522
528
|
|
523
|
-
#{{{ Special
|
529
|
+
#{{{ Special files
|
524
530
|
require 'bio'
|
525
531
|
|
526
532
|
file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
@@ -529,7 +535,8 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
529
535
|
chr_transcript_ranges ||= {}
|
530
536
|
transcript_strand = {}
|
531
537
|
|
532
|
-
TSV.
|
538
|
+
TSV.traverse 'transcript_exons' do |transcript,values|
|
539
|
+
#TSV.open('transcript_exons', :unnamed => true).through do |transcript, values|
|
533
540
|
transcript_ranges = []
|
534
541
|
|
535
542
|
exons = Misc.zip_fields(values).sort_by{|exon,rank| rank.to_i}.collect{|exon,rank| exon}
|
@@ -549,18 +556,18 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
549
556
|
chr_transcript_ranges[chr][transcript] ||= transcript_ranges
|
550
557
|
end
|
551
558
|
|
552
|
-
transcript_sequence =
|
559
|
+
transcript_sequence = {}
|
553
560
|
chr_transcript_ranges.each do |chr, transcript_ranges|
|
554
561
|
|
555
562
|
begin
|
556
563
|
raise "LRG, GL, HG, and HSCHR chromosomes not supported: #{chr}" if chr =~ /^(?:LRG_|GL0|HG|HSCHR)/
|
557
|
-
p =
|
558
|
-
|
564
|
+
p = File.expand_path("./chromosome_#{chr}")
|
565
|
+
Organism.root.annotate p
|
559
566
|
p.sub!(%r{.*/organisms/},'share/organisms/')
|
560
|
-
p = Path.setup(p, 'rbbt', Organism)
|
561
567
|
chr_str = p.produce.read
|
562
568
|
rescue Exception
|
563
569
|
Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)")
|
570
|
+
Log.exception $!
|
564
571
|
next
|
565
572
|
end
|
566
573
|
|
@@ -578,6 +585,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
578
585
|
transcript_sequence[transcript] = sequence
|
579
586
|
end
|
580
587
|
end
|
588
|
+
TSV.setup(transcript_sequence, :key_field => "Ensembl Transcript ID", :fields => ["Sequence"], :type => :single, :unnamed => true)
|
581
589
|
|
582
590
|
Misc.sensiblewrite(t.name, transcript_sequence.to_s)
|
583
591
|
end
|
@@ -623,6 +631,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
|
|
623
631
|
start_exon = exon2ensembl[start_exon]
|
624
632
|
eend_exon = exon2ensembl[eend_exon]
|
625
633
|
|
634
|
+
raise "Transcript #{ transcript } missing exons" if transcript_exons[transcript].nil?
|
635
|
+
|
626
636
|
exon_and_rank = Hash[*Misc.zip_fields(transcript_exons[transcript]).flatten]
|
627
637
|
|
628
638
|
start_exon_rank = exon_and_rank[start_exon].to_i
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|