rbbt-sources 3.0.36 → 3.0.37

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6dc886fa1c1e4052a8c5dee351f46e2c2f536e39
4
- data.tar.gz: 7a9722d19761ed23467f5f812ebb18787aeee534
3
+ metadata.gz: 27d9b524e74122ec28214154aae2646350075b34
4
+ data.tar.gz: 5a41416cadec47a6a65b5dc0fd15f836d8aaf27c
5
5
  SHA512:
6
- metadata.gz: 7da4beb09820957334fed2d9bfaa8af9da5b206fd3f4916ca357ab48224eb266c6711b72d920eb83cdb1f9ef2080db9665515e0bfa86ac7c7c557dd427c5f17f
7
- data.tar.gz: 3576bde597dd51ed3af48c20f8bcf46c57a3ae3e240bf38cc1e5e6b4173c0d9557c1375646b1de9534f2a76dac3a32fd692c84f1404c60a961ec00a372c5b98d
6
+ metadata.gz: f7479d80768eecbd609d4bb15907a36eb142d693ce185122fe7c4fe90d8b66cafa10aa0454dabc22db0a6ef4f73500d60752ef6f317b81c495929bfb7756bd2b
7
+ data.tar.gz: c9227fd124bad413ca05292702b3adcd9fe10bacbbb242022621c64c9fa63cf14ff218158dc93a48b6805e27872fb06e22f24f9cc00555896a12ef7566999037
@@ -0,0 +1,57 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+ require 'rbbt/sources/organism'
4
+
5
+ module MSigDB
6
+ extend Resource
7
+ self.subdir = 'share/databases/MSigDB'
8
+
9
+ def self.organism(org="Hsa")
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+ MSigDB.claim MSigDB['.source/all_sets.zip'], :proc do |filename|
17
+ raise "Download the 'ZIPed file set' from http://software.broadinstitute.org/gsea/downloads.jsp into #{filename}"
18
+ end
19
+
20
+ MSigDB.claim MSigDB.all_sets, :proc do |dirname|
21
+ zip_file = MSigDB['.source/all_sets.zip'].produce.find
22
+ TmpFile.with_dir do |tmpdir|
23
+ Misc.unzip_in_dir(zip_file, tmpdir)
24
+ Path.setup(tmpdir)
25
+ tmpdir.glob('**/*symbols.gmt').each do |file|
26
+ name_parts = File.basename(file).split(".")
27
+ base_name = name_parts[0..-5] * "_"
28
+ dumper = TSV::Dumper.new :key_field => "MSigDB Geneset ID", :fields => ["Associated Gene Name"], :namespace => MSigDB.organism, :type => :flat
29
+ dumper.init
30
+ io = TSV.traverse file, :type => :array, :into => dumper do |line|
31
+ name, url, *genes = line.split("\t")
32
+ [name, genes]
33
+ end
34
+ Open.write(dirname[base_name], io.stream)
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ if defined? Entity
41
+
42
+ module MSigDBGeneSet
43
+ extend Entity
44
+ self.format= "MSigDB Geneset ID"
45
+
46
+ property :name => :single2array do
47
+ self.downcase.gsub("_",' ')
48
+ end
49
+
50
+ property :genes => :single2array do
51
+ @@pathway_genes ||= MSigDB.all_sets.msigdb.tsv :persist => true
52
+ genes = @@pathway_genes[self]
53
+ Gene.setup(genes, "Associated Gene Name", MSigDB.organism).ensembl
54
+ end
55
+ end
56
+ end
57
+
@@ -72,7 +72,7 @@ module UniProt
72
72
  def self.get_uniprot_sequence(uniprotids)
73
73
  _array = Array === uniprotids
74
74
 
75
- uniprotids = [uniprotids] unless Array === uniprotids
75
+ uniprotids = [uniprotids] unless _array
76
76
  uniprotids = uniprotids.compact.collect{|id| id}
77
77
 
78
78
  result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-sequence-{ID}') do |ids|
@@ -48,6 +48,10 @@ $biomart_transcript_biotype = [
48
48
  ["Ensembl Transcript Biotype", 'transcript_biotype'],
49
49
  ]
50
50
 
51
+ $biomart_transcript_name = [
52
+ ["Ensembl Transcript Name", 'external_transcript_id'],
53
+ ]
54
+
51
55
 
52
56
  $biomart_protein_sequence = [
53
57
  ['Protein Sequence','peptide'],
@@ -442,6 +446,12 @@ file 'transcript_biotype' do |t|
442
446
  Misc.sensiblewrite(t.name, biotype.to_s)
443
447
  end
444
448
 
449
+ file 'transcript_name' do |t|
450
+ biotype = BioMart.tsv($biomart_db, $biomart_ensembl_transcript, $biomart_transcript_name, [], nil, :type => :single, :namespace => Thread.current['namespace'])
451
+
452
+ Misc.sensiblewrite(t.name, biotype.to_s)
453
+ end
454
+
445
455
  file 'gene_pfam' do |t|
446
456
  pfam = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_pfam, [], nil, :type => :double, :namespace => Thread.current['namespace'])
447
457
 
@@ -700,3 +710,46 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
700
710
 
701
711
  Misc.sensiblewrite(t.name, protein_sequence.to_s)
702
712
  end
713
+
714
+ file 'ensembl2uniprot' => ["protein_sequence", "protein_identifiers"] do |t|
715
+ ensp2unis = TSV.open(File.expand_path('./protein_identifiers'), :key_field => "Ensembl Protein ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :merge => true, :unnamed => true)
716
+ dumper = TSV::Dumper.new :key_field => "Ensembl Protein ID", :fields => ["UniProt/SwissProt Accession"], :namespace => Thread.current['namespace'], :type => :single
717
+ dumper.init
718
+ require 'rbbt/sources/uniprot'
719
+ TSV.traverse File.expand_path('./protein_sequence'), :into => dumper, :cpus => 20, :bar => true do |ensp,ensp_seq|
720
+ ensp = ensp.first if Array === ensp
721
+ unis = ensp2unis[ensp]
722
+ next if unis.nil? or unis.empty?
723
+ uni_seqs = UniProt.get_uniprot_sequence(unis)
724
+ best_uni = unis.zip(uni_seqs).sort_by do |uni,uni_seq|
725
+ (ensp_seq.length - uni_seq.length).abs
726
+ end.first.first
727
+ [ensp, best_uni]
728
+ end
729
+ Misc.sensiblewrite(t.name, dumper.stream)
730
+ end
731
+
732
+ file 'uniprot2ensembl' => ["protein_sequence", "protein_identifiers"] do |t|
733
+ uni2ensps = TSV.open(File.expand_path('./protein_identifiers'), :fields => ["Ensembl Protein ID"], :key_field => "UniProt/SwissProt Accession", :type => :flat, :merge => true, :unnamed => true)
734
+ ensp2seq = TSV.open(File.expand_path('./protein_sequence'), :unnamed => true)
735
+ dumper = TSV::Dumper.new :fields => ["Ensembl Protein ID"], :key_field => "UniProt/SwissProt Accession", :namespace => Thread.current['namespace'], :type => :single
736
+ dumper.init
737
+ require 'rbbt/sources/uniprot'
738
+ all_uni = TSV.open(File.expand_path('./protein_identifiers'), :key_field => "UniProt/SwissProt Accession", :fields => [], :type => :double, :merge => true, :unnamed => true).keys.compact.reject{|u| u.empty?}
739
+ TSV.traverse all_uni, :into => dumper, :cpus => 1, :bar => true do |uni|
740
+ uni = uni.first if Array === uni
741
+ uni_seq = UniProt.get_uniprot_sequence(uni)
742
+ ensps = uni2ensps[uni]
743
+ next if ensps.nil? or ensps.empty?
744
+ best_ensp = ensps.sort_by do |ensp|
745
+ ensp_seq = ensp2seq[ensp]
746
+ if ensp_seq
747
+ (ensp_seq.length - uni_seq.length).abs
748
+ else
749
+ uni_seq.length
750
+ end
751
+ end.first
752
+ [uni, best_ensp]
753
+ end
754
+ Misc.sensiblewrite(t.name, dumper.stream)
755
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.36
4
+ version: 3.0.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-06 00:00:00.000000000 Z
11
+ date: 2015-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -92,6 +92,7 @@ files:
92
92
  - lib/rbbt/sources/COSTART.rb
93
93
  - lib/rbbt/sources/CTCAE.rb
94
94
  - lib/rbbt/sources/HPRD.rb
95
+ - lib/rbbt/sources/MSigDB.rb
95
96
  - lib/rbbt/sources/NCI.rb
96
97
  - lib/rbbt/sources/PSI_MI.rb
97
98
  - lib/rbbt/sources/STITCH.rb