rbbt-sources 3.0.4 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8db9d47e44f4e4ab79e5568922226aabf9f8f96c
4
- data.tar.gz: 97e7b44d98556d925bc569aae4c4972dcf99cc95
3
+ metadata.gz: f7d726091f839fd461aaaec6e98dcdf9f5f846d5
4
+ data.tar.gz: d3a32cbf0b7a84d43ab4cb97d91a4c0ac1794fb1
5
5
  SHA512:
6
- metadata.gz: 8fef91c63deabb55ce444ff322c09b9e63e30f505afd785e35f70feec8820978ef8512d327ad3baddf76fea180dce56197ae21556ce7ec1501df33e5171699fc
7
- data.tar.gz: 5e53be06553d90ba8d4b952ff07698cc5da4d9e375744938665f5f3220938ebdb9d92a252b61716bcf4e8219876ed63f1b47a97a3c5b3c12d2659847b00cb622
6
+ metadata.gz: 40c7bced77cc6fccc91ad1be4cfe9110c5397aec28ed12508ea7ecf084b7f58bdebdd716a0d747fb74a2e28cb039bdf01161531e0c7477589c17d80aca2ad87c
7
+ data.tar.gz: d620dcf2f64f580eecfe32720d876f5f7eb420d88e25722cd98c23ccb330f8c640ead25b37ee006c7cd0d93f562b657da587e8d6d1b8bf8ce658ae10996e71c9
@@ -1,3 +1,5 @@
1
1
  may2009
2
2
  jun2011
3
3
  jan2013
4
+ dec2013
5
+ feb2014
@@ -78,17 +78,17 @@ $biomart_exons = [
78
78
  #{{{ Rules
79
79
 
80
80
  file 'entrez_taxids' do |t|
81
- File.open(t.name, 'w') do |f| f.write $taxs * "\n" end
81
+ Misc.sensiblewrite(t.name, $taxs * "\n")
82
82
  end
83
83
 
84
84
  file 'scientific_name' do |t|
85
- File.open(t.name, 'w') do |f| f.write $scientific_name end
85
+ Misc.sensiblewrite(t.name, $scientific_name)
86
86
  end
87
87
 
88
88
  file 'ortholog_key' do |t|
89
89
  raise "Ortholog key not defined. Set up $ortholog_key in the organism specific Rakefile; example $ortholog_key = 'human_ensembl_gene'" unless defined? $ortholog_key and not $ortholog_key.nil?
90
90
 
91
- File.open(t.name, 'w') do |f| f.write $ortholog_key end
91
+ Misc.sensiblewrite(t.name, $ortholog_key)
92
92
  end
93
93
 
94
94
  file 'identifiers' do |t|
@@ -134,7 +134,7 @@ file 'identifiers' do |t|
134
134
  end
135
135
  end
136
136
 
137
- File.open(t.name, 'w') do |f| f.puts identifiers end
137
+ Misc.sensiblewrite(t.name, identifiers.to_s)
138
138
  end
139
139
 
140
140
  file 'lexicon' => 'identifiers' do |t|
@@ -157,7 +157,7 @@ file 'protein_identifiers' do |t|
157
157
  end
158
158
  end
159
159
 
160
- File.open(t.name, 'w') do |f| f.puts identifiers end
160
+ Misc.sensiblewrite(t.name, identifiers.to_s)
161
161
  end
162
162
 
163
163
  file 'transcript_probes' do |t|
@@ -168,32 +168,32 @@ file 'transcript_probes' do |t|
168
168
  end
169
169
  end
170
170
 
171
- File.open(t.name, 'w') do |f| f.puts identifiers end
171
+ Misc.sensiblewrite(t.name, identifiers.to_s)
172
172
  end
173
173
 
174
174
  file 'gene_transcripts' do |t|
175
175
  transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_transcript, [], nil, :type => :flat, :namespace => $namespace)
176
176
 
177
- File.open(t.name, 'w') do |f| f.puts transcripts end
177
+ Misc.sensiblewrite(t.name, transcripts.to_s)
178
178
  end
179
179
 
180
180
  file 'transcripts' => 'gene_positions' do |t|
181
181
  transcripts = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript, [], nil, :type => :list, :namespace => $namespace)
182
182
  transcripts.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
183
183
 
184
- File.open(t.name, 'w') do |f| f.puts transcripts end
184
+ Misc.sensiblewrite(t.name, transcripts.to_s)
185
185
  end
186
186
 
187
187
  file 'gene_positions' do |t|
188
188
  sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [])
189
189
 
190
- File.open(t.name, 'w') do |f| f.puts sequences end
190
+ Misc.sensiblewrite(t.name, sequences.to_s)
191
191
  end
192
192
 
193
193
  file 'gene_sequence' do |t|
194
194
  sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_sequence, [], nil, :type => :flat, :namespace => $namespace)
195
195
 
196
- File.open(t.name, 'w') do |f|
196
+ Misc.sensiblewrite(t.name) do |f|
197
197
  f.puts "#: :type=:single"
198
198
  f.puts "#Ensembl Gene ID\tGene Sequence"
199
199
  sequences.each do |seq, genes|
@@ -211,7 +211,7 @@ file 'exons' => 'gene_positions' do |t|
211
211
  exons = BioMart.tsv($biomart_db, $biomart_ensembl_exon, $biomart_exons, [], nil, :merge => false, :type => :list, :namespace => $namespace)
212
212
  exons.attach TSV.open('gene_positions'), :fields => ["Chromosome Name"]
213
213
 
214
- File.open(t.name, 'w') do |f| f.puts exons end
214
+ Misc.sensiblewrite(t.name, exons.to_s)
215
215
  end
216
216
 
217
217
  file 'transcript_exons' do |t|
@@ -372,7 +372,7 @@ file 'gene_go' do |t|
372
372
  else
373
373
  goterms = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_go, [], nil, :type => :double, :namespace => $namespace)
374
374
 
375
- File.open(t.name, 'w') do |f| f.puts goterms end
375
+ Misc.sensiblewrite(t.name, goterms.to_s)
376
376
  end
377
377
  end
378
378
 
@@ -388,7 +388,7 @@ file 'gene_go_bp' => 'gene_go' do |t|
388
388
  end
389
389
 
390
390
 
391
- File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
391
+ Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
392
392
  end
393
393
 
394
394
  file 'gene_go_cc' => 'gene_go' do |t|
@@ -403,7 +403,7 @@ file 'gene_go_cc' => 'gene_go' do |t|
403
403
  end
404
404
 
405
405
 
406
- File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
406
+ Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
407
407
  end
408
408
 
409
409
  file 'gene_go_mf' => 'gene_go' do |t|
@@ -418,7 +418,7 @@ file 'gene_go_mf' => 'gene_go' do |t|
418
418
  end
419
419
 
420
420
 
421
- File.open(t.name, 'w') do |f| f.puts gene_go.slice "GO ID" end
421
+ Misc.sensiblewrite(t.name, gene_go.slice("GO ID").to_s)
422
422
  end
423
423
 
424
424
 
@@ -426,19 +426,19 @@ end
426
426
  file 'gene_biotype' do |t|
427
427
  biotype = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_biotype, [], nil, :type => :single, :namespace => $namespace)
428
428
 
429
- File.open(t.name, 'w') do |f| f.puts biotype end
429
+ Misc.sensiblewrite(t.name, biotype.to_s)
430
430
  end
431
431
 
432
432
  file 'gene_pfam' do |t|
433
433
  pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => $namespace)
434
434
 
435
- File.open(t.name, 'w') do |f| f.puts pfam end
435
+ Misc.sensiblewrite(t.name, pfam.to_s)
436
436
  end
437
437
 
438
438
  file 'chromosomes' do |t|
439
439
  goterms = BioMart.tsv($biomart_db, ['Chromosome Name', "chromosome_name"] , [] , [], nil, :type => :double, :namespace => $namespace)
440
440
 
441
- File.open(t.name, 'w') do |f| f.puts goterms end
441
+ Misc.sensiblewrite(t.name, goterms.to_s)
442
442
  end
443
443
 
444
444
  file 'blacklist_chromosomes' => 'chromosomes' do |t|
@@ -449,12 +449,12 @@ end
449
449
  file 'blacklist_genes' => ['blacklist_chromosomes', 'gene_positions'] do |t|
450
450
  Open.read(t.prerequisites.first)
451
451
  genes = CMD.cmd("grep -f '#{t.prerequisites.first}' | cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq
452
- File.open(t.name, 'w') do |f| f.puts genes * "\n" end
452
+ Misc.sensiblewrite(t.name, genes * "\n")
453
453
  end
454
454
 
455
455
  file 'sanctioned_genes' => ['blacklist_genes', 'gene_positions'] do |t|
456
456
  genes = CMD.cmd("cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq - Open.read(t.prerequisites.first).split("\n")
457
- File.open(t.name, 'w') do |f| f.puts genes * "\n" end
457
+ Misc.sensiblewrite(t.name, genes * "\n")
458
458
  end
459
459
 
460
460
 
@@ -511,7 +511,13 @@ rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
511
511
  task = $2
512
512
  Misc.in_dir(archive) do
513
513
  BioMart.set_archive archive
514
- Rake::Task[task].invoke
514
+ begin
515
+ Rake::Task[task].invoke
516
+ rescue
517
+ Log.error "Error producing archived (#{archive}) version of #{task}: #{t.name}"
518
+ Log.exception $!
519
+ raise $!
520
+ end
515
521
  BioMart.unset_archive
516
522
  end
517
523
  end
@@ -520,7 +526,7 @@ end
520
526
 
521
527
 
522
528
 
523
- #{{{ Special riles
529
+ #{{{ Special files
524
530
  require 'bio'
525
531
 
526
532
  file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
@@ -529,7 +535,8 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
529
535
  chr_transcript_ranges ||= {}
530
536
  transcript_strand = {}
531
537
 
532
- TSV.open('transcript_exons', :unnamed => true).through do |transcript, values|
538
+ TSV.traverse 'transcript_exons' do |transcript,values|
539
+ #TSV.open('transcript_exons', :unnamed => true).through do |transcript, values|
533
540
  transcript_ranges = []
534
541
 
535
542
  exons = Misc.zip_fields(values).sort_by{|exon,rank| rank.to_i}.collect{|exon,rank| exon}
@@ -549,18 +556,18 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
549
556
  chr_transcript_ranges[chr][transcript] ||= transcript_ranges
550
557
  end
551
558
 
552
- transcript_sequence = TSV.setup({}, :key_field => "Ensembl Transcript ID", :fields => ["Sequence"], :type => :single)
559
+ transcript_sequence = {}
553
560
  chr_transcript_ranges.each do |chr, transcript_ranges|
554
561
 
555
562
  begin
556
563
  raise "LRG, GL, HG, and HSCHR chromosomes not supported: #{chr}" if chr =~ /^(?:LRG_|GL0|HG|HSCHR)/
557
- p = Organism.root
558
- p.replace File.expand_path("./chromosome_#{chr}")
564
+ p = File.expand_path("./chromosome_#{chr}")
565
+ Organism.root.annotate p
559
566
  p.sub!(%r{.*/organisms/},'share/organisms/')
560
- p = Path.setup(p, 'rbbt', Organism)
561
567
  chr_str = p.produce.read
562
568
  rescue Exception
563
569
  Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)")
570
+ Log.exception $!
564
571
  next
565
572
  end
566
573
 
@@ -578,6 +585,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
578
585
  transcript_sequence[transcript] = sequence
579
586
  end
580
587
  end
588
+ TSV.setup(transcript_sequence, :key_field => "Ensembl Transcript ID", :fields => ["Sequence"], :type => :single, :unnamed => true)
581
589
 
582
590
  Misc.sensiblewrite(t.name, transcript_sequence.to_s)
583
591
  end
@@ -623,6 +631,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
623
631
  start_exon = exon2ensembl[start_exon]
624
632
  eend_exon = exon2ensembl[eend_exon]
625
633
 
634
+ raise "Transcript #{ transcript } missing exons" if transcript_exons[transcript].nil?
635
+
626
636
  exon_and_rank = Hash[*Misc.zip_fields(transcript_exons[transcript]).flatten]
627
637
 
628
638
  start_exon_rank = exon_and_rank[start_exon].to_i
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.4
4
+ version: 3.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-27 00:00:00.000000000 Z
11
+ date: 2014-06-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util