rbbt-sources 3.1.16 → 3.1.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: feef731c1d37abc9fcaf618086634f4bebde968c
4
- data.tar.gz: 58d28f8048a043d406c0cc1f72fe3b44186ead5e
3
+ metadata.gz: bf8a7403301523a87252057d4be1ac9e0f348708
4
+ data.tar.gz: '0939814d7b68cb713e05b5f1af2a572a8fffee75'
5
5
  SHA512:
6
- metadata.gz: b5fa5a78fe7e152691a92b6119202fdb0066fcf7377bafeb907b02519477221762c8b563239c26024700ee48f301e4377527f95441d87cadb50a26bc884dc626
7
- data.tar.gz: cdb897373677da35b65aa93964af005ac1048a4097643a4ae0b1a67024f67dc90da5204b5eea928429039cc63c9da275c6faf3b99cc920f3e46ca893c8f3dcab
6
+ metadata.gz: 779c0aa427ee0335a2533533179e05ef0c42c543316ea7cd9eb3c5b793f6603ea46cf63bffba71df9aed9cec0fc08df984255c1c3a83fe85c9bb5b51048395d1
7
+ data.tar.gz: 8a78a7bc1e4ba4819849c0375a1d6744e20bce89cdaf91e1c21cd26d4541f2ff91bd1d5aa9bed05e38b211051cc51774df2b0d2eb198bd4500d94c95196d48ac
@@ -1,5 +1,6 @@
1
1
  may2009
2
2
  may2012
3
+ sep2013
3
4
  jan2013
4
5
  feb2014
5
6
  dec2013
@@ -0,0 +1,177 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module CASCADE
5
+ extend Resource
6
+ self.subdir = 'share/databases/CASCADE'
7
+
8
+ #def self.organism(org="Hsa")
9
+ # Organism.default_code(org)
10
+ #end
11
+
12
+ #self.search_paths = {}
13
+ #self.search_paths[:default] = :lib
14
+
15
+
16
+ URL = 'https://bitbucket.org/asmundf/cascade'
17
+ CASCADE.claim CASCADE.interactions, :proc do
18
+ io = nil
19
+ TmpFile.with_file do |tmp|
20
+ Misc.in_dir tmp do
21
+ Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
22
+ `git clone #{URL}`
23
+ io = Open.open("cascade/cascade.tsv")
24
+ end
25
+ end
26
+
27
+ tsv = TSV.open(io, :merge => true, :header_hash => '')
28
+
29
+ new_fields = ["ENTITYB"] + (tsv.fields - ["ENTITYB"])
30
+ tsv = tsv.reorder :key, new_fields
31
+
32
+ tsv.key_field = "ENTITYA (Associated Gene Name)"
33
+ tsv.rename_field "ENTITYB", "ENTITYB (Associated Gene Name)"
34
+
35
+ tsv.process "PMID" do |values|
36
+ values.collect{|v| v.scan(/\d+/) * ";;"}
37
+ end
38
+
39
+ tsv.to_s
40
+ end
41
+
42
+ CASCADE.claim CASCADE.members, :proc do
43
+ io = nil
44
+ TmpFile.with_file do |tmp|
45
+ Misc.in_dir tmp do
46
+ Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
47
+ `git clone #{URL}`
48
+ io = Open.open("cascade/cascade_translation.tsv")
49
+ end
50
+ end
51
+
52
+ tsv = TSV.open(io, :merge => true, :header_hash => '', :type => :flat, :sep2 => /[,.]\s*/)
53
+
54
+ end
55
+
56
+ CASCADE.claim CASCADE.paradigm, :proc do
57
+
58
+ tsv = CASCADE.interactions.tsv
59
+ members = CASCADE.members.tsv
60
+
61
+ proteins = Set.new members.values.flatten.uniq
62
+ outputs = Set.new
63
+ associations = {}
64
+
65
+ tsv.through do |source, values|
66
+ values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
67
+ next if typea == 'gene'
68
+
69
+ if typeb == 'gene'
70
+ target.sub!('_g','')
71
+ type = '-t'
72
+ elsif typeb == 'output' or typea == 'output'
73
+ type = '-ap'
74
+ else
75
+ type = '-a'
76
+ end
77
+
78
+ proteins << source unless source.include? '_f' or source.include? '_c'
79
+ proteins << target unless target.include? '_f' or target.include? '_c'
80
+
81
+ outputs << source if typea == 'output'
82
+ outputs << target if typeb == 'output'
83
+
84
+ effect_symbol = '>'
85
+ effect_symbol = '|' if effect.include? 'inhibit'
86
+
87
+ associations[[source,target]] = [type, effect_symbol]
88
+ end
89
+ end
90
+
91
+ str = StringIO.new
92
+
93
+ proteins.each do |p|
94
+ next if outputs.include? p
95
+ str.puts ["protein", p] * "\t"
96
+ end
97
+
98
+ outputs.each do |o|
99
+ str.puts ["abstract", o] * "\t"
100
+ end
101
+
102
+ members.each do |e, targets|
103
+ e = e.dup
104
+ case
105
+ when e.include?('_c')
106
+ str.puts ["complex", e] * "\t"
107
+ type = 'component'
108
+ when e.include?('_f')
109
+ str.puts ["family", e] * "\t"
110
+ type = 'member'
111
+ else
112
+ next
113
+ end
114
+
115
+ targets.each do |target|
116
+ associations[[target,e]] = [type, '>']
117
+ end
118
+ end
119
+
120
+
121
+ associations.each do |p,i|
122
+ source, target = p
123
+ type, symbol = i
124
+
125
+ str.puts [source, target, [type,symbol]*""] * "\t"
126
+ end
127
+
128
+ str.rewind
129
+ str
130
+ end
131
+
132
+ CASCADE.claim CASCADE["topology.sif"], :proc do
133
+
134
+ tsv = CASCADE.interactions.tsv
135
+
136
+ str = StringIO.new
137
+
138
+ tsv.through do |source, values|
139
+ values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
140
+
141
+ effect_symbol = '->'
142
+ effect_symbol = '-|' if effect.include? 'inhibit'
143
+
144
+ str.puts [source, effect_symbol, target] * " "
145
+ end
146
+ end
147
+
148
+ str.rewind
149
+ str
150
+ end
151
+
152
+ CASCADE.claim CASCADE.output_nodes, :proc do
153
+ tsv = CASCADE.interactions.tsv
154
+
155
+ output = TSV.setup({}, :key_field => "Node", :fields => ["Sign"], :type => :single)
156
+
157
+ tsv.through do |source, values|
158
+ values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
159
+ case target
160
+ when "Antisurvival"
161
+ output[source] = -1
162
+ when "Prosurvival"
163
+ output[source] = 1
164
+ end
165
+ end
166
+ end
167
+
168
+ output.to_s
169
+ end
170
+ end
171
+
172
+ iif CASCADE.interactions.produce.find if __FILE__ == $0
173
+ iif CASCADE.members.produce.find if __FILE__ == $0
174
+ iif CASCADE.paradigm.produce.find if __FILE__ == $0
175
+ iif CASCADE["topology.sif"].produce.find if __FILE__ == $0
176
+ iif CASCADE.output_nodes.produce(true).find if __FILE__ == $0
177
+
@@ -0,0 +1,59 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module PhosphoELM
5
+ extend Resource
6
+ self.subdir = 'share/databases/PhosphoELM'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+ PhosphoELM.claim PhosphoELM[".source/dump.tgz"], :proc do |file|
17
+ raise "Place phosphoELM_all_latest.dump.tgz from http://phospho.elm.eu.org at #{file}. Please consult license."
18
+ end
19
+
20
+ PhosphoELM.claim PhosphoELM.data, :proc do
21
+ tgz = PhosphoELM[".source/dump.tgz"].produce.find
22
+
23
+ organism = PhosphoELM.organism
24
+ uni2ensp = Organism.identifiers(organism).tsv :key_field => "UniProt/SwissProt Accession", :fields => ["Ensembl Protein ID"], :type => :flat, :persist => true
25
+ ensp2seq = Organism.protein_sequence(organism).tsv :persist => true
26
+
27
+ dumper = TSV::Dumper.new(:key_field => "Phosphosite", :fields => ["Kinases", "Source", "PMID"], :type => :list)
28
+ dumper.init
29
+ TmpFile.with_file do |dir|
30
+ Misc.in_dir dir do
31
+ CMD.cmd("tar xvfz #{tgz}")
32
+ f = Dir.glob("*.dump").first
33
+ TSV.traverse Open.open(f), :type => :array, :into => dumper do |line|
34
+ next unless line =~ /Homo sapiens/
35
+ acc, sequence, position, code, pmids, kinases, source, species, entry_date = line.split("\t")
36
+ ensps = uni2ensp[acc]
37
+ Log.warn "No Ensembl Protein ID for #{acc}" if ensps.nil?
38
+ next if ensps.nil?
39
+ sequence << "*"
40
+ good = ensps.select{|ensp| sequence == ensp2seq[ensp]}
41
+ Log.warn "No sequence match for #{acc} - #{ensps*", "}" if good.empty?
42
+ next if good.empty?
43
+ res = []
44
+ good.each do |ensp|
45
+ phospho_site = [ensp,":", code, position] * ""
46
+ res << [phospho_site, [kinases, source, pmids]]
47
+ end
48
+ res.extend MultipleResult
49
+
50
+ res
51
+ end
52
+ end
53
+ end
54
+ dumper.stream
55
+ end
56
+ end
57
+
58
+ iif PhosphoELM.data.produce(true).find if __FILE__ == $0
59
+
@@ -0,0 +1,39 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module PhosphoSitePlues
5
+ extend Resource
6
+ self.subdir = 'share/databases/PhosphoSitePlues'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+ #self.search_paths = {}
17
+ #self.search_paths[:default] = :lib
18
+
19
+
20
+
21
+ ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
22
+ Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
23
+ O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
24
+ Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
25
+ Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
26
+ Ubiquitination_site_dataset.gz)
27
+
28
+ ALL_FILES.each do |file|
29
+ PhosphoSitePlues.claim PhosphoSitePlues[".source"][file], :proc do |f|
30
+ raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
31
+ end
32
+ end
33
+
34
+ PhosphoSitePlues.claim PhosphoSitePlues.kinase_substrate, :proc do
35
+ PhosphoSitePlues[".source/Kinase_Substrate_Dataset.gz"]
36
+ end
37
+ end
38
+
39
+ iif PhosphoSitePlues.data.produce.find if __FILE__ == $0
@@ -1,27 +1,53 @@
1
- require 'rbbt'
1
+ require 'rbbt-util'
2
2
  require 'rbbt/resource'
3
3
 
4
4
  module Reactome
5
5
  extend Resource
6
6
  self.subdir = "share/databases/Reactome"
7
7
 
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
8
13
  Reactome.claim Reactome.protein_pathways, :proc do
9
14
  #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
10
- url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
11
- tsv = TSV.open(Open.open(url), :key_field => 0, :fields => [1], :merge => true, :type => :double)
12
- tsv.key_field = "UniProt/SwissProt Accession"
15
+ url = "http://reactome.org/download/current/Ensembl2Reactome.txt"
16
+ tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
17
+ tsv.key_field = "Ensembl Gene ID"
18
+ tsv.fields = ["Reactome Pathway ID"]
19
+ tsv.namespace = Reactome.organism
20
+ tsv.to_s
21
+ end
22
+
23
+ Reactome.claim Reactome.protein_pathways_all, :proc do
24
+ #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
25
+ url = "http://reactome.org/download/current/Ensembl2Reactome_All_Levels.txt"
26
+ tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
27
+ tsv.key_field = "Ensembl Gene ID"
13
28
  tsv.fields = ["Reactome Pathway ID"]
14
- tsv.namespace = "Hsa"
29
+ tsv.namespace = Reactome.organism
15
30
  tsv.to_s
16
31
  end
17
32
 
33
+
18
34
  Reactome.claim Reactome.pathway_names, :proc do
19
35
  #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
20
36
  url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
21
- tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [2], :type => :single)
37
+ tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [3], :type => :single)
22
38
  tsv.key_field = "Reactome Pathway ID"
23
39
  tsv.fields = ["Pathway Name"]
24
- tsv.namespace = "Hsa"
40
+ tsv.namespace = Reactome.organism
41
+ tsv.to_s
42
+ end
43
+
44
+ Reactome.claim Reactome.pathway_pathway, :proc do
45
+ #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
46
+ url = "http://reactome.org/download/current/ReactomePathwaysRelation.txt"
47
+ tsv = TSV.open(Open.open(url), :type => :flat, :merge => true)
48
+ tsv.key_field = "Reactome Pathway ID"
49
+ tsv.fields = ["Reactome Pathway ID"]
50
+ tsv.namespace = Reactome.organism
25
51
  tsv.to_s
26
52
  end
27
53
 
@@ -30,10 +56,39 @@ module Reactome
30
56
  tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
31
57
  tsv.key_field = "UniProt/SwissProt Accession"
32
58
  tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
33
- tsv.namespace = "Hsa"
59
+ tsv.namespace = Reactome.organism
34
60
  tsv.to_s
35
61
  end
36
62
 
63
+ #Reactome.claim Reactome.protein_pathways, :proc do
64
+ # #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
65
+ # url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
66
+ # tsv = TSV.open(Open.open(url), :key_field => 0, :fields => [1], :merge => true, :type => :double)
67
+ # tsv.key_field = "UniProt/SwissProt Accession"
68
+ # tsv.fields = ["Reactome Pathway ID"]
69
+ # tsv.namespace = Reactome.organism
70
+ # tsv.to_s
71
+ #end
72
+
73
+ #Reactome.claim Reactome.pathway_names, :proc do
74
+ # #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
75
+ # url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
76
+ # tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [2], :type => :single)
77
+ # tsv.key_field = "Reactome Pathway ID"
78
+ # tsv.fields = ["Pathway Name"]
79
+ # tsv.namespace = Reactome.organism
80
+ # tsv.to_s
81
+ #end
82
+
83
+ #Reactome.claim Reactome.protein_protein, :proc do
84
+ # url = "http://www.reactome.org/download/current/homo_sapiens.interactions.txt.gz"
85
+ # tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
86
+ # tsv.key_field = "UniProt/SwissProt Accession"
87
+ # tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
88
+ # tsv.namespace = Reactome.organism
89
+ # tsv.to_s
90
+ #end
91
+
37
92
  end
38
93
 
39
94
  if defined? Entity
@@ -82,3 +137,8 @@ if defined? Entity
82
137
  end
83
138
  end
84
139
  end
140
+
141
+ Log.tsv Reactome.protein_pathways.produce.tsv if __FILE__ == $0
142
+ Log.tsv Reactome.protein_pathways_all.produce.tsv if __FILE__ == $0
143
+ Log.tsv Reactome.pathway_names.produce(true).tsv if __FILE__ == $0
144
+ Log.tsv Reactome.pathway_pathway.produce.tsv if __FILE__ == $0
@@ -0,0 +1,77 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module Signor
5
+ extend Resource
6
+ self.subdir = 'share/databases/Signor'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+ Signor.claim Signor[".source/all.csv"], :proc do |file|
17
+ raise "Download all human data in CSV format from 'http://signor.uniroma2.it/downloads.php#all_download' and place in #{file}"
18
+ end
19
+
20
+ Signor.claim Signor.data, :proc do
21
+ #io = Misc.open_pipe do |sin|
22
+ # Signor[".source/all.csv"].open do |f|
23
+ # quoted = false
24
+ # while c = f.getc
25
+ # if c == '"'
26
+ # quoted = ! quoted
27
+ # end
28
+ # c = " " if c == "\n" and quoted
29
+ # sin << c
30
+ # end
31
+ # end
32
+ #end
33
+
34
+ sio = Signor[".source/all.csv"].open
35
+ io_tmp = Misc.remove_quoted_new_line(sio)
36
+ io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
37
+
38
+ tsv = TSV.open io, :header_hash => "", :sep => ";", :merge => true, :type => :double, :zipped => true, :monitor => true
39
+ tsv.each do |k,values|
40
+ clean_values = values.collect{|vs| vs.collect{|v| (v[0] == '"' and v[-1] = '"') ? v[1..-2] : v }.collect{|v| v.gsub("--SEMICOLON--", ';') } }
41
+
42
+ values.replace clean_values
43
+ end
44
+ tsv
45
+ end
46
+
47
+ Signor.claim Signor.protein_protein, :proc do
48
+ parser = TSV::Parser.new Signor.data
49
+ fields = parser.fields
50
+ dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (UniProt/SwissProt Accession)", "Effect", "Mechanism", "Residue"], :type => :double, :organism => Signor.organism
51
+ dumper.init
52
+ TSV.traverse parser, :into => dumper do |k,values|
53
+ info = {}
54
+ fields.zip(values).each do |field, value|
55
+ info[field] = value
56
+ end
57
+ next unless info["TYPEA"].first == "protein"
58
+ unia = info["IDA"].first
59
+
60
+ res = []
61
+ res.extend MultipleResult
62
+
63
+ info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["RESIDUE"]).each do |v|
64
+ typeb,idb,eff,mech,resi = v.flatten
65
+ next unless typeb == "protein"
66
+ res << [unia, [idb, eff, mech,resi]]
67
+ end
68
+
69
+ res
70
+ end
71
+
72
+ Misc.collapse_stream dumper.stream
73
+ end
74
+ end
75
+
76
+ iif Signor.protein_protein.produce(true).find if __FILE__ == $0
77
+
@@ -1,5 +1,15 @@
1
1
  #: :type=:single
2
2
  #Release build
3
+ release-89 may2017
4
+ release-88 mar2017
5
+ release-87 dec2016
6
+ release-86 oct2016
7
+ release-85 jul2016
8
+ release-84 mar2016
9
+ release-83 dec2015
10
+ release-82 sep2015
11
+ release-81 jul2015
12
+ release-80 may2015
3
13
  release-79 mar2015
4
14
  release-78 dec2014
5
15
  release-77 oct2014
@@ -672,6 +672,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
672
672
  transcript = transcript2ensembl[transcript_id]
673
673
  protein = transcript_protein[transcript]
674
674
 
675
+ next if transcript =~ /^LRG/
676
+
675
677
  start_exon = exon2ensembl[start_exon]
676
678
  eend_exon = exon2ensembl[eend_exon]
677
679
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.16
4
+ version: 3.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-15 00:00:00.000000000 Z
11
+ date: 2017-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -89,6 +89,7 @@ files:
89
89
  - etc/allowed_biomart_archives
90
90
  - etc/biomart/missing_in_archive
91
91
  - etc/organisms
92
+ - lib/rbbt/sources/CASCADE.rb
92
93
  - lib/rbbt/sources/COSTART.rb
93
94
  - lib/rbbt/sources/CTCAE.rb
94
95
  - lib/rbbt/sources/HPRD.rb
@@ -115,10 +116,13 @@ files:
115
116
  - lib/rbbt/sources/organism.rb
116
117
  - lib/rbbt/sources/pfam.rb
117
118
  - lib/rbbt/sources/pharmagkb.rb
119
+ - lib/rbbt/sources/phospho_ELM.rb
120
+ - lib/rbbt/sources/phospho_site_plus.rb
118
121
  - lib/rbbt/sources/pina.rb
119
122
  - lib/rbbt/sources/polysearch.rb
120
123
  - lib/rbbt/sources/pubmed.rb
121
124
  - lib/rbbt/sources/reactome.rb
125
+ - lib/rbbt/sources/signor.rb
122
126
  - lib/rbbt/sources/stitch.rb
123
127
  - lib/rbbt/sources/string.rb
124
128
  - lib/rbbt/sources/synapse.rb