rbbt-sources 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/rbbt/sources/COSMIC.rb +1 -1
- data/share/install/Organism/organism_helpers.rb +4 -141
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OTA2OWRlODEyNjgwZTEyNDQ4MTZiN2NkZWM2NTgyY2QwOTY2NjM2NQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YzRmMzdjOGI0YjYwNjVlNDhjZWE5NjcxMTBjZmU4YzJkMGZmMjBhYw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NDE1M2U5ZDU5MWVhNGVkYzQ1NzM1MzIwNzQ0OGUzYzFjZjMyMWE2OTUxZTFk
|
10
|
+
MmUyOWVhZDhkNjFjNWJmZWJmMDI1ZmU3NjgyNzM3NTg2YmQ1Y2Q5Mjk5NmNj
|
11
|
+
NWNiMWQ1OTQ2MTNiMWM1OTVhODA1MjExNTQwODFlZmNiZDYyNDI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NWYzNjg5ZjExNzM2MmJkYmVmMTQ5ZWY4OGJiOWMzZGUxNTJlMDVkN2NkOTFm
|
14
|
+
N2ZmMTAzMTJkMjdkMjc4ZjFhZmQ2YTM4NzgyMGFlM2U0NGQzYjlkMzcwNjU1
|
15
|
+
OTBkZWY2YThjNDc1YTlhZThkOGNmNWU5MjhmMmMwNjg4NmQxYWI=
|
data/lib/rbbt/sources/COSMIC.rb
CHANGED
@@ -6,7 +6,7 @@ module COSMIC
|
|
6
6
|
self.subdir = "share/databases/COSMIC"
|
7
7
|
|
8
8
|
COSMIC.claim COSMIC.mutations, :proc do
|
9
|
-
url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/
|
9
|
+
url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCompleteExport_v66_250713.tsv.gz"
|
10
10
|
|
11
11
|
stream = CMD.cmd('awk \'BEGIN{FS="\t"} { if ($12 != "" && $12 != "Mutation ID") { sub($12, "COSM" $12 ":" $4)}; print}\'', :in => Open.open(url), :pipe => true)
|
12
12
|
tsv = TSV.open(stream, :type => :list, :header_hash => "", :key_field => "Mutation ID", :namespace => "Hsa/jun2011")
|
@@ -462,7 +462,7 @@ rule /^chromosome_.*/ do |t|
|
|
462
462
|
chr = t.name.match(/chromosome_(.*)/)[1]
|
463
463
|
|
464
464
|
# HACK: Skip LRG chromosomes
|
465
|
-
raise "LRG chromosomes not supported: #{ chr }" if chr =~ /^LRG_/
|
465
|
+
raise "LRG and GL chromosomes not supported: #{ chr }" if chr =~ /^(?:LRG_|GL0)/
|
466
466
|
|
467
467
|
archive = File.basename(FileUtils.pwd) =~ /^([a-z]{3}[0-9]{4})$/i ? $1 : nil
|
468
468
|
|
@@ -553,13 +553,14 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
553
553
|
chr_transcript_ranges.each do |chr, transcript_ranges|
|
554
554
|
|
555
555
|
begin
|
556
|
+
raise "LRG, GL, HG, and HSCHR chromosomes not supported: #{chr}" if chr =~ /^(?:LRG_|GL0|HG|HSCHR)/
|
556
557
|
p = Organism.root
|
557
558
|
p.replace File.expand_path("./chromosome_#{chr}")
|
558
559
|
p.sub!(%r{.*/organisms/},'share/organisms/')
|
559
560
|
p = Path.setup(p, 'rbbt', Organism)
|
560
561
|
chr_str = p.produce.read
|
561
562
|
rescue Exception
|
562
|
-
Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)
|
563
|
+
Log.debug("Chr #{ chr } failed (#{transcript_ranges.length} transcripts not covered)")
|
563
564
|
next
|
564
565
|
end
|
565
566
|
|
@@ -649,6 +650,7 @@ end
|
|
649
650
|
|
650
651
|
file 'transcript_3utr' => ["transcript_5utr"] do |t|
|
651
652
|
end
|
653
|
+
|
652
654
|
file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr", "transcript_phase", "transcript_sequence"] do |t|
|
653
655
|
transcript_5utr = TSV.open(File.expand_path('./transcript_5utr'), :unnamed => true)
|
654
656
|
transcript_3utr = TSV.open(File.expand_path('./transcript_3utr'), :unnamed => true)
|
@@ -676,142 +678,3 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
|
|
676
678
|
Open.write(t.name, protein_sequence.to_s)
|
677
679
|
end
|
678
680
|
end
|
679
|
-
|
680
|
-
#{{{ OLD
|
681
|
-
|
682
|
-
#file 'transcript_phase' do |t|
|
683
|
-
# tsv = TSV.setup({}, :key_field => "Ensembl Transcript ID", :fields => ["Phase"], :type => :single, :cast => :to_i)
|
684
|
-
#
|
685
|
-
# transcript_cds_start = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, [['CDNA Start','cds_start']], [], nil, :type => :flat, :namespace => $namespace)
|
686
|
-
# transcript_cds_start.through do |transcript, values|
|
687
|
-
# phase = values.compact.reject{|p| p.empty?}.select{|p| p == "1" or p == "2"}.first
|
688
|
-
# tsv[transcript] = phase.to_i unless phase.nil?
|
689
|
-
# end
|
690
|
-
#
|
691
|
-
# File.open(t.name, 'w') do |f| f.puts tsv end
|
692
|
-
#end
|
693
|
-
#
|
694
|
-
#file 'transcript_3utr' do |t|
|
695
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :namespace => $namespace)
|
696
|
-
#
|
697
|
-
# File.open(t.name, 'w') do |f|
|
698
|
-
# f.puts "#: :type=:single#cast=to_i"
|
699
|
-
# f.puts "#Ensembl Transcript ID\t3' UTR Length"
|
700
|
-
# utrs.each do |seq,trans|
|
701
|
-
# trans.each do |tran|
|
702
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
703
|
-
# end
|
704
|
-
# end
|
705
|
-
# end
|
706
|
-
#end
|
707
|
-
#
|
708
|
-
#file 'transcript_5utr' do |t|
|
709
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :namespace => $namespace)
|
710
|
-
#
|
711
|
-
# File.open(t.name, 'w') do |f|
|
712
|
-
# f.puts "#: :type=:single#cast=to_i"
|
713
|
-
# f.puts "#Ensembl Transcript ID\t5' UTR Length"
|
714
|
-
# utrs.each do |seq,trans|
|
715
|
-
# trans.each do |tran|
|
716
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
717
|
-
# end
|
718
|
-
# end
|
719
|
-
# end
|
720
|
-
#end
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
#file 'transcript_sequence' do |t|
|
725
|
-
# sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
726
|
-
#
|
727
|
-
# File.open(t.name, 'w') do |f|
|
728
|
-
# f.puts "#: :type=:single"
|
729
|
-
# f.puts "#Ensembl Transcript ID\tTranscript Sequence"
|
730
|
-
# sequences.each do |seq, genes|
|
731
|
-
# genes.each do |gene|
|
732
|
-
# f.write gene
|
733
|
-
# f.write "\t"
|
734
|
-
# f.write seq
|
735
|
-
# f.write "\n"
|
736
|
-
# end
|
737
|
-
# end
|
738
|
-
# end
|
739
|
-
#end
|
740
|
-
|
741
|
-
#file 'transcript_phase' do |t|
|
742
|
-
# tsv = TSV.setup({}, :key_field => "Ensembl Transcript ID", :fields => ["Phase"], :type => :single, :cast => :to_i)
|
743
|
-
#
|
744
|
-
# transcript_cds_start = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, [['CDNA Start','cds_start']], [], nil, :type => :flat, :namespace => $namespace)
|
745
|
-
# transcript_cds_start.through do |transcript, values|
|
746
|
-
# phase = values.compact.reject{|p| p.empty?}.select{|p| p == "1" or p == "2"}.first
|
747
|
-
# tsv[transcript] = phase.to_i unless phase.nil?
|
748
|
-
# end
|
749
|
-
#
|
750
|
-
# File.open(t.name, 'w') do |f| f.puts tsv end
|
751
|
-
#end
|
752
|
-
#
|
753
|
-
#file 'transcript_3utr' do |t|
|
754
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_3utr, [], nil, :type => :flat, :namespace => $namespace)
|
755
|
-
#
|
756
|
-
# File.open(t.name, 'w') do |f|
|
757
|
-
# f.puts "#: :type=:single#cast=to_i"
|
758
|
-
# f.puts "#Ensembl Transcript ID\t3' UTR Length"
|
759
|
-
# utrs.each do |seq,trans|
|
760
|
-
# trans.each do |tran|
|
761
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
762
|
-
# end
|
763
|
-
# end
|
764
|
-
# end
|
765
|
-
#end
|
766
|
-
#
|
767
|
-
#file 'transcript_5utr' do |t|
|
768
|
-
# utrs = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_5utr, [], nil, :type => :flat, :namespace => $namespace)
|
769
|
-
#
|
770
|
-
# File.open(t.name, 'w') do |f|
|
771
|
-
# f.puts "#: :type=:single#cast=to_i"
|
772
|
-
# f.puts "#Ensembl Transcript ID\t5' UTR Length"
|
773
|
-
# utrs.each do |seq,trans|
|
774
|
-
# trans.each do |tran|
|
775
|
-
# f.puts [tran, seq.length] * "\t" if seq =~ /^[ACTG]+$/
|
776
|
-
# end
|
777
|
-
# end
|
778
|
-
# end
|
779
|
-
#end
|
780
|
-
|
781
|
-
#file 'transcript_sequence' do |t|
|
782
|
-
# sequences = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
783
|
-
#
|
784
|
-
# File.open(t.name, 'w') do |f|
|
785
|
-
# f.puts "#: :type=:single"
|
786
|
-
# f.puts "#Ensembl Transcript ID\tTranscript Sequence"
|
787
|
-
# sequences.each do |seq, genes|
|
788
|
-
# genes.each do |gene|
|
789
|
-
# f.write gene
|
790
|
-
# f.write "\t"
|
791
|
-
# f.write seq
|
792
|
-
# f.write "\n"
|
793
|
-
# end
|
794
|
-
# end
|
795
|
-
# end
|
796
|
-
#end
|
797
|
-
#file 'protein_sequence' => 'chromosomes' do |t|
|
798
|
-
# #chromosomes = TSV.open(t.prerequisites.first).keys
|
799
|
-
# #sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :namespace => $namespace, :chunk_filter => ['chromosome_name', chromosomes])
|
800
|
-
# sequences = BioMart.tsv($biomart_db, $biomart_ensembl_protein, $biomart_protein_sequence, [], nil, :type => :flat, :namespace => $namespace)
|
801
|
-
#
|
802
|
-
# File.open(t.name, 'w') do |f|
|
803
|
-
# f.puts "#: :type=:single"
|
804
|
-
# f.puts "#Ensembl Protein ID\tProtein Sequence"
|
805
|
-
# sequences.each do |seq, genes|
|
806
|
-
# genes.each do |gene|
|
807
|
-
# f.write gene
|
808
|
-
# f.write "\t"
|
809
|
-
# f.write seq
|
810
|
-
# f.write "\n"
|
811
|
-
# end
|
812
|
-
# end
|
813
|
-
# end
|
814
|
-
#end
|
815
|
-
|
816
|
-
|
817
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|