rbbt-sources 2.1.1 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/rbbt/sources/COSMIC.rb +1 -1
- data/share/install/Organism/organism_helpers.rb +4 -141
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OTA2OWRlODEyNjgwZTEyNDQ4MTZiN2NkZWM2NTgyY2QwOTY2NjM2NQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YzRmMzdjOGI0YjYwNjVlNDhjZWE5NjcxMTBjZmU4YzJkMGZmMjBhYw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NDE1M2U5ZDU5MWVhNGVkYzQ1NzM1MzIwNzQ0OGUzYzFjZjMyMWE2OTUxZTFk
|
10
|
+
MmUyOWVhZDhkNjFjNWJmZWJmMDI1ZmU3NjgyNzM3NTg2YmQ1Y2Q5Mjk5NmNj
|
11
|
+
NWNiMWQ1OTQ2MTNiMWM1OTVhODA1MjExNTQwODFlZmNiZDYyNDI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NWYzNjg5ZjExNzM2MmJkYmVmMTQ5ZWY4OGJiOWMzZGUxNTJlMDVkN2NkOTFm
|
14
|
+
N2ZmMTAzMTJkMjdkMjc4ZjFhZmQ2YTM4NzgyMGFlM2U0NGQzYjlkMzcwNjU1
|
15
|
+
OTBkZWY2YThjNDc1YTlhZThkOGNmNWU5MjhmMmMwNjg4NmQxYWI=
|
data/lib/rbbt/sources/COSMIC.rb
CHANGED
@@ -6,7 +6,7 @@ module COSMIC
|
|
6
6
|
self.subdir = "share/databases/COSMIC"
|
7
7
|
|
8
8
|
COSMIC.claim COSMIC.mutations, :proc do
|
9
|
-
url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/
|
9
|
+
url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCompleteExport_v66_250713.tsv.gz"
|
10
10
|
|
11
11
|
stream = CMD.cmd('awk \'BEGIN{FS="\t"} { if ($12 != "" && $12 != "Mutation ID") { sub($12, "COSM" $12 ":" $4)}; print}\'', :in => Open.open(url), :pipe => true)
|
12
12
|
tsv = TSV.open(stream, :type => :list, :header_hash => "", :key_field => "Mutation ID", :namespace => "Hsa/jun2011")
|
@@ -462,7 +462,7 @@ rule /^chromosome_.*/ do |t|
|
|
462
462
|
chr = t.name.match(/chromosome_(.*)/)[1]
|
463
463
|
|
464
464
|
# HACK: Skip LRG chromosomes
|
465
|
-
raise "LRG chromosomes not supported: #{ chr }" if chr =~ /^LRG_/
|
465
|
+
raise "LRG and GL chromosomes not supported: #{ chr }" if chr =~ /^(?:LRG_|GL0)/
|
466
466
|
|
467
467
|
archive = File.basename(FileUtils.pwd) =~ /^([a-z]{3}[0-9]{4})$/i ? $1 : nil
|
468
468
|
|
@@ -553,13 +553,14 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
553
553
|
chr_transcript_ranges.each do |chr, transcript_ranges|
|
554
554
|
|
555
555
|
begin
|
556
|
+
raise "LRG, GL, HG, and HSCHR chromosomes not supported: #{chr}" if chr =~ /^(?:LRG_|GL0|HG|HSCHR)/
|
556
557
|
p = Organism.root
|
557
558
|
p.replace File.expand_path("./chromosome_#{chr}")
|
558
559
|
p.sub!(%r{.*/organisms/},'share/organisms/')
|
559
560
|
p = Path.setup(p, 'rbbt', Organism)
|
560
561
|
chr_str = p.produce.read
|
561
562
|
rescue Exception
|
562
|
-
Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)
|
563
|
+
Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)")
|
563
564
|
next
|
564
565
|
end
|
565
566
|
|
@@ -649,6 +650,7 @@ end
|
|
649
650
|
|
650
651
|
file 'transcript_3utr' => ["transcript_5utr"] do |t|
|
651
652
|
end
|
653
|
+
|
652
654
|
file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr", "transcript_phase", "transcript_sequence"] do |t|
|
653
655
|
transcript_5utr = TSV.open(File.expand_path('./transcript_5utr'), :unnamed => true)
|
654
656
|
transcript_3utr = TSV.open(File.expand_path('./transcript_3utr'), :unnamed => true)
|
@@ -676,142 +678,3 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
|
|
676
678
|
Open.write(t.name, protein_sequence.to_s)
|
677
679
|
end
|
678
680
|
end
|
679
|
-
|
680
|
-
#{{{ OLD
|
681
|
-
|
682
|
-
#file 'transcript_phase' do |t|
|
683
|
-
# tsv = TSV.setup({}, :key_field => "Ensembl Transcript ID", :fields => ["Phase"], :type => :single, :cast => :to_i)
|
684
|
-
#
|
685
|
-
# transcript_cds_start = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, [['CDNA Start','cds_start']], [], nil, :type => :flat, :namespace => $namespace)
|
686
|
-
# transcript_cds_start.through do |transcript, values|
|
687
|
-
# phase = values.compact.reject{|p| p.empty?}.select{|p| p == "1" or p == "2"}.first
|
688
|
-
# tsv[transcript] = phase.to_i unless phase.nil?
|
689
|
-
# end
|
690
|
-
#
|
691
|
-
# File.open(t.name, 'w') do |f| f.puts tsv end
|
692
|
-
#end
|
693
|
-
#
|
694
|
-
#file 'transcript_3utr' do |t|
|
695
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :namespace => $namespace)
|
696
|
-
#
|
697
|
-
# File.open(t.name, 'w') do |f|
|
698
|
-
# f.puts "#: :type=:single#cast=to_i"
|
699
|
-
# f.puts "#Ensembl Transcript ID\t3' UTR Length"
|
700
|
-
# utrs.each do |seq,trans|
|
701
|
-
# trans.each do |tran|
|
702
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
703
|
-
# end
|
704
|
-
# end
|
705
|
-
# end
|
706
|
-
#end
|
707
|
-
#
|
708
|
-
#file 'transcript_5utr' do |t|
|
709
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :namespace => $namespace)
|
710
|
-
#
|
711
|
-
# File.open(t.name, 'w') do |f|
|
712
|
-
# f.puts "#: :type=:single#cast=to_i"
|
713
|
-
# f.puts "#Ensembl Transcript ID\t5' UTR Length"
|
714
|
-
# utrs.each do |seq,trans|
|
715
|
-
# trans.each do |tran|
|
716
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
717
|
-
# end
|
718
|
-
# end
|
719
|
-
# end
|
720
|
-
#end
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
#file 'transcript_sequence' do |t|
|
725
|
-
# sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
726
|
-
#
|
727
|
-
# File.open(t.name, 'w') do |f|
|
728
|
-
# f.puts "#: :type=:single"
|
729
|
-
# f.puts "#Ensembl Transcript ID\tTranscript Sequence"
|
730
|
-
# sequences.each do |seq, genes|
|
731
|
-
# genes.each do |gene|
|
732
|
-
# f.write gene
|
733
|
-
# f.write "\t"
|
734
|
-
# f.write seq
|
735
|
-
# f.write "\n"
|
736
|
-
# end
|
737
|
-
# end
|
738
|
-
# end
|
739
|
-
#end
|
740
|
-
|
741
|
-
#file 'transcript_phase' do |t|
|
742
|
-
# tsv = TSV.setup({}, :key_field => "Ensembl Transcript ID", :fields => ["Phase"], :type => :single, :cast => :to_i)
|
743
|
-
#
|
744
|
-
# transcript_cds_start = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, [['CDNA Start','cds_start']], [], nil, :type => :flat, :namespace => $namespace)
|
745
|
-
# transcript_cds_start.through do |transcript, values|
|
746
|
-
# phase = values.compact.reject{|p| p.empty?}.select{|p| p == "1" or p == "2"}.first
|
747
|
-
# tsv[transcript] = phase.to_i unless phase.nil?
|
748
|
-
# end
|
749
|
-
#
|
750
|
-
# File.open(t.name, 'w') do |f| f.puts tsv end
|
751
|
-
#end
|
752
|
-
#
|
753
|
-
#file 'transcript_3utr' do |t|
|
754
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :namespace => $namespace)
|
755
|
-
#
|
756
|
-
# File.open(t.name, 'w') do |f|
|
757
|
-
# f.puts "#: :type=:single#cast=to_i"
|
758
|
-
# f.puts "#Ensembl Transcript ID\t3' UTR Length"
|
759
|
-
# utrs.each do |seq,trans|
|
760
|
-
# trans.each do |tran|
|
761
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
762
|
-
# end
|
763
|
-
# end
|
764
|
-
# end
|
765
|
-
#end
|
766
|
-
#
|
767
|
-
#file 'transcript_5utr' do |t|
|
768
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :namespace => $namespace)
|
769
|
-
#
|
770
|
-
# File.open(t.name, 'w') do |f|
|
771
|
-
# f.puts "#: :type=:single#cast=to_i"
|
772
|
-
# f.puts "#Ensembl Transcript ID\t5' UTR Length"
|
773
|
-
# utrs.each do |seq,trans|
|
774
|
-
# trans.each do |tran|
|
775
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
776
|
-
# end
|
777
|
-
# end
|
778
|
-
# end
|
779
|
-
#end
|
780
|
-
|
781
|
-
#file 'transcript_sequence' do |t|
|
782
|
-
# sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
783
|
-
#
|
784
|
-
# File.open(t.name, 'w') do |f|
|
785
|
-
# f.puts "#: :type=:single"
|
786
|
-
# f.puts "#Ensembl Transcript ID\tTranscript Sequence"
|
787
|
-
# sequences.each do |seq, genes|
|
788
|
-
# genes.each do |gene|
|
789
|
-
# f.write gene
|
790
|
-
# f.write "\t"
|
791
|
-
# f.write seq
|
792
|
-
# f.write "\n"
|
793
|
-
# end
|
794
|
-
# end
|
795
|
-
# end
|
796
|
-
#end
|
797
|
-
#file 'protein_sequence' => 'chromosomes' do |t|
|
798
|
-
# #chromosomes = TSV.open(t.prerequisites.first).keys
|
799
|
-
# #sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :namespace => $namespace, :chunk_filter => ['chromosome_name', chromosomes])
|
800
|
-
# sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
801
|
-
#
|
802
|
-
# File.open(t.name, 'w') do |f|
|
803
|
-
# f.puts "#: :type=:single"
|
804
|
-
# f.puts "#Ensembl Protein ID\tProtein Sequence"
|
805
|
-
# sequences.each do |seq, genes|
|
806
|
-
# genes.each do |gene|
|
807
|
-
# f.write gene
|
808
|
-
# f.write "\t"
|
809
|
-
# f.write seq
|
810
|
-
# f.write "\n"
|
811
|
-
# end
|
812
|
-
# end
|
813
|
-
# end
|
814
|
-
#end
|
815
|
-
|
816
|
-
|
817
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|