rbbt-sources 3.0.4 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +2 -0
- data/share/install/Organism/organism_helpers.rb +37 -27
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d726091f839fd461aaaec6e98dcdf9f5f846d5
|
4
|
+
data.tar.gz: d3a32cbf0b7a84d43ab4cb97d91a4c0ac1794fb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40c7bced77cc6fccc91ad1be4cfe9110c5397aec28ed12508ea7ecf084b7f58bdebdd716a0d747fb74a2e28cb039bdf01161531e0c7477589c17d80aca2ad87c
|
7
|
+
data.tar.gz: d620dcf2f64f580eecfe32720d876f5f7eb420d88e25722cd98c23ccb330f8c640ead25b37ee006c7cd0d93f562b657da587e8d6d1b8bf8ce658ae10996e71c9
|
@@ -78,17 +78,17 @@ $biomart_exons = [
|
|
78
78
|
#{{{ Rules
|
79
79
|
|
80
80
|
file 'entrez_taxids' do |t|
|
81
|
-
|
81
|
+
Misc.sensiblewrite(t.name, $taxs * "\n")
|
82
82
|
end
|
83
83
|
|
84
84
|
file 'scientific_name' do |t|
|
85
|
-
|
85
|
+
Misc.sensiblewrite(t.name, $scientific_name)
|
86
86
|
end
|
87
87
|
|
88
88
|
file 'ortholog_key' do |t|
|
89
89
|
raise "Ortholog key not defined. Set up $ortholog_key in the organism specific Rakefile; example $ortholog_key = 'human_ensembl_gene'" unless defined? $ortholog_key and not $ortholog_key.nil?
|
90
90
|
|
91
|
-
|
91
|
+
Misc.sensiblewrite(t.name, $ortholog_key)
|
92
92
|
end
|
93
93
|
|
94
94
|
file 'identifiers' do |t|
|
@@ -134,7 +134,7 @@ file 'identifiers' do |t|
|
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
|
-
|
137
|
+
Misc.sensiblewrite(t.name, identifiers.to_s)
|
138
138
|
end
|
139
139
|
|
140
140
|
file 'lexicon' => 'identifiers' do |t|
|
@@ -157,7 +157,7 @@ file 'protein_identifiers' do |t|
|
|
157
157
|
end
|
158
158
|
end
|
159
159
|
|
160
|
-
|
160
|
+
Misc.sensiblewrite(t.name, identifiers.to_s)
|
161
161
|
end
|
162
162
|
|
163
163
|
file 'transcript_probes' do |t|
|
@@ -168,32 +168,32 @@ file 'transcript_probes' do |t|
|
|
168
168
|
end
|
169
169
|
end
|
170
170
|
|
171
|
-
|
171
|
+
Misc.sensiblewrite(t.name, identifiers.to_s)
|
172
172
|
end
|
173
173
|
|
174
174
|
file 'gene_transcripts' do |t|
|
175
175
|
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
|
176
176
|
|
177
|
-
|
177
|
+
Misc.sensiblewrite(t.name, transcripts.to_s)
|
178
178
|
end
|
179
179
|
|
180
180
|
file 'transcripts' => 'gene_positions' do |t|
|
181
181
|
transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => $namespace)
|
182
182
|
transcripts.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
183
183
|
|
184
|
-
|
184
|
+
Misc.sensiblewrite(t.name, transcripts.to_s)
|
185
185
|
end
|
186
186
|
|
187
187
|
file 'gene_positions' do |t|
|
188
188
|
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [])
|
189
189
|
|
190
|
-
|
190
|
+
Misc.sensiblewrite(t.name, sequences.to_s)
|
191
191
|
end
|
192
192
|
|
193
193
|
file 'gene_sequence' do |t|
|
194
194
|
sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
195
195
|
|
196
|
-
|
196
|
+
Misc.sensiblewrite(t.name) do |f|
|
197
197
|
f.puts "#: :type=:single"
|
198
198
|
f.puts "#Ensembl Gene ID\tGene Sequence"
|
199
199
|
sequences.each do |seq, genes|
|
@@ -211,7 +211,7 @@ file 'exons' => 'gene_positions' do |t|
|
|
211
211
|
exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => $namespace)
|
212
212
|
exons.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
|
213
213
|
|
214
|
-
|
214
|
+
Misc.sensiblewrite(t.name, exons.to_s)
|
215
215
|
end
|
216
216
|
|
217
217
|
file 'transcript_exons' do |t|
|
@@ -372,7 +372,7 @@ file 'gene_go' do |t|
|
|
372
372
|
else
|
373
373
|
goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace => $namespace)
|
374
374
|
|
375
|
-
|
375
|
+
Misc.sensiblewrite(t.name, goterms.to_s)
|
376
376
|
end
|
377
377
|
end
|
378
378
|
|
@@ -388,7 +388,7 @@ file 'gene_go_bp' => 'gene_go' do |t|
|
|
388
388
|
end
|
389
389
|
|
390
390
|
|
391
|
-
|
391
|
+
Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
|
392
392
|
end
|
393
393
|
|
394
394
|
file 'gene_go_cc' => 'gene_go' do |t|
|
@@ -403,7 +403,7 @@ file 'gene_go_cc' => 'gene_go' do |t|
|
|
403
403
|
end
|
404
404
|
|
405
405
|
|
406
|
-
|
406
|
+
Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
|
407
407
|
end
|
408
408
|
|
409
409
|
file 'gene_go_mf' => 'gene_go' do |t|
|
@@ -418,7 +418,7 @@ file 'gene_go_mf' => 'gene_go' do |t|
|
|
418
418
|
end
|
419
419
|
|
420
420
|
|
421
|
-
|
421
|
+
Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
|
422
422
|
end
|
423
423
|
|
424
424
|
|
@@ -426,19 +426,19 @@ end
|
|
426
426
|
file 'gene_biotype' do |t|
|
427
427
|
biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace => $namespace)
|
428
428
|
|
429
|
-
|
429
|
+
Misc.sensiblewrite(t.name, biotype.to_s)
|
430
430
|
end
|
431
431
|
|
432
432
|
file 'gene_pfam' do |t|
|
433
433
|
pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
|
434
434
|
|
435
|
-
|
435
|
+
Misc.sensiblewrite(t.name, pfam.to_s)
|
436
436
|
end
|
437
437
|
|
438
438
|
file 'chromosomes' do |t|
|
439
439
|
goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace => $namespace)
|
440
440
|
|
441
|
-
|
441
|
+
Misc.sensiblewrite(t.name, goterms.to_s)
|
442
442
|
end
|
443
443
|
|
444
444
|
file 'blacklist_chromosomes' => 'chromosomes' do |t|
|
@@ -449,12 +449,12 @@ end
|
|
449
449
|
file 'blacklist_genes' => ['blacklist_chromosomes', 'gene_positions'] do |t|
|
450
450
|
Open.read(t.prerequisites.first)
|
451
451
|
genes = CMD.cmd("grep -f '#{t.prerequisites.first}' | cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq
|
452
|
-
|
452
|
+
Misc.sensiblewrite(t.name, genes * "\n")
|
453
453
|
end
|
454
454
|
|
455
455
|
file 'sanctioned_genes' => ['blacklist_genes', 'gene_positions'] do |t|
|
456
456
|
genes = CMD.cmd("cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq - Open.read(t.prerequisites.first).split("\n")
|
457
|
-
|
457
|
+
Misc.sensiblewrite(t.name, genes * "\n")
|
458
458
|
end
|
459
459
|
|
460
460
|
|
@@ -511,7 +511,13 @@ rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
|
511
511
|
task = $2
|
512
512
|
Misc.in_dir(archive) do
|
513
513
|
BioMart.set_archive archive
|
514
|
-
|
514
|
+
begin
|
515
|
+
Rake::Task[task].invoke
|
516
|
+
rescue
|
517
|
+
Log.error "Error producing archived (#{archive}) version of #{task}: #{t.name}"
|
518
|
+
Log.exception $!
|
519
|
+
raise $!
|
520
|
+
end
|
515
521
|
BioMart.unset_archive
|
516
522
|
end
|
517
523
|
end
|
@@ -520,7 +526,7 @@ end
|
|
520
526
|
|
521
527
|
|
522
528
|
|
523
|
-
#{{{ Special
|
529
|
+
#{{{ Special files
|
524
530
|
require 'bio'
|
525
531
|
|
526
532
|
file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
@@ -529,7 +535,8 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
529
535
|
chr_transcript_ranges ||= {}
|
530
536
|
transcript_strand = {}
|
531
537
|
|
532
|
-
TSV.
|
538
|
+
TSV.traverse 'transcript_exons' do |transcript,values|
|
539
|
+
#TSV.open('transcript_exons', :unnamed => true).through do |transcript, values|
|
533
540
|
transcript_ranges = []
|
534
541
|
|
535
542
|
exons = Misc.zip_fields(values).sort_by{|exon,rank| rank.to_i}.collect{|exon,rank| exon}
|
@@ -549,18 +556,18 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
549
556
|
chr_transcript_ranges[chr][transcript] ||= transcript_ranges
|
550
557
|
end
|
551
558
|
|
552
|
-
transcript_sequence =
|
559
|
+
transcript_sequence = {}
|
553
560
|
chr_transcript_ranges.each do |chr, transcript_ranges|
|
554
561
|
|
555
562
|
begin
|
556
563
|
raise "LRG, GL, HG, and HSCHR chromosomes not supported: #{chr}" if chr =~ /^(?:LRG_|GL0|HG|HSCHR)/
|
557
|
-
p =
|
558
|
-
|
564
|
+
p = File.expand_path("./chromosome_#{chr}")
|
565
|
+
Organism.root.annotate p
|
559
566
|
p.sub!(%r{.*/organisms/},'share/organisms/')
|
560
|
-
p = Path.setup(p, 'rbbt', Organism)
|
561
567
|
chr_str = p.produce.read
|
562
568
|
rescue Exception
|
563
569
|
Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)")
|
570
|
+
Log.exception $!
|
564
571
|
next
|
565
572
|
end
|
566
573
|
|
@@ -578,6 +585,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
578
585
|
transcript_sequence[transcript] = sequence
|
579
586
|
end
|
580
587
|
end
|
588
|
+
TSV.setup(transcript_sequence, :key_field => "Ensembl Transcript ID", :fields => ["Sequence"], :type => :single, :unnamed => true)
|
581
589
|
|
582
590
|
Misc.sensiblewrite(t.name, transcript_sequence.to_s)
|
583
591
|
end
|
@@ -623,6 +631,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
|
|
623
631
|
start_exon = exon2ensembl[start_exon]
|
624
632
|
eend_exon = exon2ensembl[eend_exon]
|
625
633
|
|
634
|
+
raise "Transcript #{ transcript } missing exons" if transcript_exons[transcript].nil?
|
635
|
+
|
626
636
|
exon_and_rank = Hash[*Misc.zip_fields(transcript_exons[transcript]).flatten]
|
627
637
|
|
628
638
|
start_exon_rank = exon_and_rank[start_exon].to_i
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|