rbbt-sources 3.0.4 → 3.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8db9d47e44f4e4ab79e5568922226aabf9f8f96c
4
- data.tar.gz: 97e7b44d98556d925bc569aae4c4972dcf99cc95
3
+ metadata.gz: f7d726091f839fd461aaaec6e98dcdf9f5f846d5
4
+ data.tar.gz: d3a32cbf0b7a84d43ab4cb97d91a4c0ac1794fb1
5
5
  SHA512:
6
- metadata.gz: 8fef91c63deabb55ce444ff322c09b9e63e30f505afd785e35f70feec8820978ef8512d327ad3baddf76fea180dce56197ae21556ce7ec1501df33e5171699fc
7
- data.tar.gz: 5e53be06553d90ba8d4b952ff07698cc5da4d9e375744938665f5f3220938ebdb9d92a252b61716bcf4e8219876ed63f1b47a97a3c5b3c12d2659847b00cb622
6
+ metadata.gz: 40c7bced77cc6fccc91ad1be4cfe9110c5397aec28ed12508ea7ecf084b7f58bdebdd716a0d747fb74a2e28cb039bdf01161531e0c7477589c17d80aca2ad87c
7
+ data.tar.gz: d620dcf2f64f580eecfe32720d876f5f7eb420d88e25722cd98c23ccb330f8c640ead25b37ee006c7cd0d93f562b657da587e8d6d1b8bf8ce658ae10996e71c9
@@ -1,3 +1,5 @@
1
1
  may2009
2
2
  jun2011
3
3
  jan2013
4
+ dec2013
5
+ feb2014
@@ -78,17 +78,17 @@ $biomart_exons = [
78
78
  #{{{ Rules
79
79
 
80
80
  file 'entrez_taxids' do |t|
81
- File.open(t.name, 'w') do |f| f.write $taxs * "\n" end
81
+ Misc.sensiblewrite(t.name, $taxs * "\n")
82
82
  end
83
83
 
84
84
  file 'scientific_name' do |t|
85
- File.open(t.name, 'w') do |f| f.write $scientific_name end
85
+ Misc.sensiblewrite(t.name, $scientific_name)
86
86
  end
87
87
 
88
88
  file 'ortholog_key' do |t|
89
89
  raise "Ortholog key not defined. Set up $ortholog_key in the organism specific Rakefile; example $ortholog_key = 'human_ensembl_gene'" unless defined? $ortholog_key and not $ortholog_key.nil?
90
90
 
91
- File.open(t.name, 'w') do |f| f.write $ortholog_key end
91
+ Misc.sensiblewrite(t.name, $ortholog_key)
92
92
  end
93
93
 
94
94
  file 'identifiers' do |t|
@@ -134,7 +134,7 @@ file 'identifiers' do |t|
134
134
  end
135
135
  end
136
136
 
137
- File.open(t.name, 'w') do |f| f.puts identifiers end
137
+ Misc.sensiblewrite(t.name, identifiers.to_s)
138
138
  end
139
139
 
140
140
  file 'lexicon' => 'identifiers' do |t|
@@ -157,7 +157,7 @@ file 'protein_identifiers' do |t|
157
157
  end
158
158
  end
159
159
 
160
- File.open(t.name, 'w') do |f| f.puts identifiers end
160
+ Misc.sensiblewrite(t.name, identifiers.to_s)
161
161
  end
162
162
 
163
163
  file 'transcript_probes' do |t|
@@ -168,32 +168,32 @@ file 'transcript_probes' do |t|
168
168
  end
169
169
  end
170
170
 
171
- File.open(t.name, 'w') do |f| f.puts identifiers end
171
+ Misc.sensiblewrite(t.name, identifiers.to_s)
172
172
  end
173
173
 
174
174
  file 'gene_transcripts' do |t|
175
175
  transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
176
176
 
177
- File.open(t.name, 'w') do |f| f.puts transcripts end
177
+ Misc.sensiblewrite(t.name, transcripts.to_s)
178
178
  end
179
179
 
180
180
  file 'transcripts' => 'gene_positions' do |t|
181
181
  transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => $namespace)
182
182
  transcripts.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
183
183
 
184
- File.open(t.name, 'w') do |f| f.puts transcripts end
184
+ Misc.sensiblewrite(t.name, transcripts.to_s)
185
185
  end
186
186
 
187
187
  file 'gene_positions' do |t|
188
188
  sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [])
189
189
 
190
- File.open(t.name, 'w') do |f| f.puts sequences end
190
+ Misc.sensiblewrite(t.name, sequences.to_s)
191
191
  end
192
192
 
193
193
  file 'gene_sequence' do |t|
194
194
  sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => $namespace)
195
195
 
196
- File.open(t.name, 'w') do |f|
196
+ Misc.sensiblewrite(t.name) do |f|
197
197
  f.puts "#: :type=:single"
198
198
  f.puts "#Ensembl Gene ID\tGene Sequence"
199
199
  sequences.each do |seq, genes|
@@ -211,7 +211,7 @@ file 'exons' => 'gene_positions' do |t|
211
211
  exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => $namespace)
212
212
  exons.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
213
213
 
214
- File.open(t.name, 'w') do |f| f.puts exons end
214
+ Misc.sensiblewrite(t.name, exons.to_s)
215
215
  end
216
216
 
217
217
  file 'transcript_exons' do |t|
@@ -372,7 +372,7 @@ file 'gene_go' do |t|
372
372
  else
373
373
  goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace => $namespace)
374
374
 
375
- File.open(t.name, 'w') do |f| f.puts goterms end
375
+ Misc.sensiblewrite(t.name, goterms.to_s)
376
376
  end
377
377
  end
378
378
 
@@ -388,7 +388,7 @@ file 'gene_go_bp' => 'gene_go' do |t|
388
388
  end
389
389
 
390
390
 
391
- File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
391
+ Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
392
392
  end
393
393
 
394
394
  file 'gene_go_cc' => 'gene_go' do |t|
@@ -403,7 +403,7 @@ file 'gene_go_cc' => 'gene_go' do |t|
403
403
  end
404
404
 
405
405
 
406
- File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
406
+ Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
407
407
  end
408
408
 
409
409
  file 'gene_go_mf' => 'gene_go' do |t|
@@ -418,7 +418,7 @@ file 'gene_go_mf' => 'gene_go' do |t|
418
418
  end
419
419
 
420
420
 
421
- File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
421
+ Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
422
422
  end
423
423
 
424
424
 
@@ -426,19 +426,19 @@ end
426
426
  file 'gene_biotype' do |t|
427
427
  biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace => $namespace)
428
428
 
429
- File.open(t.name, 'w') do |f| f.puts biotype end
429
+ Misc.sensiblewrite(t.name, biotype.to_s)
430
430
  end
431
431
 
432
432
  file 'gene_pfam' do |t|
433
433
  pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
434
434
 
435
- File.open(t.name, 'w') do |f| f.puts pfam end
435
+ Misc.sensiblewrite(t.name, pfam.to_s)
436
436
  end
437
437
 
438
438
  file 'chromosomes' do |t|
439
439
  goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace => $namespace)
440
440
 
441
- File.open(t.name, 'w') do |f| f.puts goterms end
441
+ Misc.sensiblewrite(t.name, goterms.to_s)
442
442
  end
443
443
 
444
444
  file 'blacklist_chromosomes' => 'chromosomes' do |t|
@@ -449,12 +449,12 @@ end
449
449
  file 'blacklist_genes' => ['blacklist_chromosomes', 'gene_positions'] do |t|
450
450
  Open.read(t.prerequisites.first)
451
451
  genes = CMD.cmd("grep -f '#{t.prerequisites.first}' | cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq
452
- File.open(t.name, 'w') do |f| f.puts genes * "\n" end
452
+ Misc.sensiblewrite(t.name, genes * "\n")
453
453
  end
454
454
 
455
455
  file 'sanctioned_genes' => ['blacklist_genes', 'gene_positions'] do |t|
456
456
  genes = CMD.cmd("cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq - Open.read(t.prerequisites.first).split("\n")
457
- File.open(t.name, 'w') do |f| f.puts genes * "\n" end
457
+ Misc.sensiblewrite(t.name, genes * "\n")
458
458
  end
459
459
 
460
460
 
@@ -511,7 +511,13 @@ rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
511
511
  task = $2
512
512
  Misc.in_dir(archive) do
513
513
  BioMart.set_archive archive
514
- Rake::Task[task].invoke
514
+ begin
515
+ Rake::Task[task].invoke
516
+ rescue
517
+ Log.error "Error producing archived (#{archive}) version of #{task}: #{t.name}"
518
+ Log.exception $!
519
+ raise $!
520
+ end
515
521
  BioMart.unset_archive
516
522
  end
517
523
  end
@@ -520,7 +526,7 @@ end
520
526
 
521
527
 
522
528
 
523
- #{{{ Special riles
529
+ #{{{ Special files
524
530
  require 'bio'
525
531
 
526
532
  file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
@@ -529,7 +535,8 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
529
535
  chr_transcript_ranges ||= {}
530
536
  transcript_strand = {}
531
537
 
532
- TSV.open('transcript_exons', :unnamed => true).through do |transcript, values|
538
+ TSV.traverse 'transcript_exons' do |transcript,values|
539
+ #TSV.open('transcript_exons', :unnamed => true).through do |transcript, values|
533
540
  transcript_ranges = []
534
541
 
535
542
  exons = Misc.zip_fields(values).sort_by{|exon,rank| rank.to_i}.collect{|exon,rank| exon}
@@ -549,18 +556,18 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
549
556
  chr_transcript_ranges[chr][transcript] ||= transcript_ranges
550
557
  end
551
558
 
552
- transcript_sequence = TSV.setup({}, :key_field => "Ensembl Transcript ID", :fields => ["Sequence"], :type => :single)
559
+ transcript_sequence = {}
553
560
  chr_transcript_ranges.each do |chr, transcript_ranges|
554
561
 
555
562
  begin
556
563
  raise "LRG, GL, HG, and HSCHR chromosomes not supported: #{chr}" if chr =~ /^(?:LRG_|GL0|HG|HSCHR)/
557
- p = Organism.root
558
- p.replace File.expand_path("./chromosome_#{chr}")
564
+ p = File.expand_path("./chromosome_#{chr}")
565
+ Organism.root.annotate p
559
566
  p.sub!(%r{.*/organisms/},'share/organisms/')
560
- p = Path.setup(p, 'rbbt', Organism)
561
567
  chr_str = p.produce.read
562
568
  rescue Exception
563
569
  Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)")
570
+ Log.exception $!
564
571
  next
565
572
  end
566
573
 
@@ -578,6 +585,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
578
585
  transcript_sequence[transcript] = sequence
579
586
  end
580
587
  end
588
+ TSV.setup(transcript_sequence, :key_field => "Ensembl Transcript ID", :fields => ["Sequence"], :type => :single, :unnamed => true)
581
589
 
582
590
  Misc.sensiblewrite(t.name, transcript_sequence.to_s)
583
591
  end
@@ -623,6 +631,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
623
631
  start_exon = exon2ensembl[start_exon]
624
632
  eend_exon = exon2ensembl[eend_exon]
625
633
 
634
+ raise "Transcript #{ transcript } missing exons" if transcript_exons[transcript].nil?
635
+
626
636
  exon_and_rank = Hash[*Misc.zip_fields(transcript_exons[transcript]).flatten]
627
637
 
628
638
  start_exon_rank = exon_and_rank[start_exon].to_i
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.4
4
+ version: 3.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-27 00:00:00.000000000 Z
11
+ date: 2014-06-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util