rbbt-sources 3.1.29 → 3.1.30
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/COREAD_phospho_proteome.rb +183 -0
- data/lib/rbbt/sources/array_express.rb +17 -0
- data/lib/rbbt/sources/phospho_site_plus.rb +7 -7
- data/lib/rbbt/sources/pubmed.rb +19 -18
- data/lib/rbbt/sources/signor.rb +36 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7da6b2938f54788ddcd4ee03b78e8e8b237bf478
|
4
|
+
data.tar.gz: fc847e1ddf84cb477b4f07ac8b33a393009e72ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2a2261e40edd858ca483b269d409a95e48f61acfeb07c9b31e2381e5bcffa619c06c6c5afaa4df05ba91405638bbaa94897722c66ef1610344b542746c2aa61
|
7
|
+
data.tar.gz: 7bd00cb785efd5db78ba14607436e42465055da71119d27741f20b7c7de4b791b90c321aaecc1de1d681c9d5ba75823401d03fd5da247d7a187dec56a27c4fd3
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
# CITE: Genomic Determinants of Protein Abundance Variation in Colorectal
|
5
|
+
# Cancer Cells PMID: 28854368
|
6
|
+
#
|
7
|
+
# Roumeliotis TI, Williams SP, Gonçalves E, et al. Genomic Determinants of
|
8
|
+
# Protein Abundance Variation in Colorectal Cancer Cells. Cell Reports.
|
9
|
+
# 2017;20(9):2201-2214. doi:10.1016/j.celrep.2017.08.010.
|
10
|
+
|
11
|
+
module COREADPhosphoProteome
|
12
|
+
extend Resource
|
13
|
+
self.subdir = 'share/databases/COREADPhosphoProteome'
|
14
|
+
|
15
|
+
#def self.organism(org="Hsa")
|
16
|
+
# Organism.default_code(org)
|
17
|
+
#end
|
18
|
+
|
19
|
+
#self.search_paths = {}
|
20
|
+
#self.search_paths[:default] = :lib
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome[".source/mmc3.xlsx"], :url, "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5583477/bin/mmc3.xlsx"
|
25
|
+
|
26
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.data, :proc do
|
27
|
+
require 'rbbt/tsv/excel'
|
28
|
+
io = TSV.excel COREADPhosphoProteome[".source/mmc3.xlsx"].produce.find, :text => true
|
29
|
+
TSV.collapse_stream io
|
30
|
+
end
|
31
|
+
|
32
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_levels, :proc do
|
33
|
+
tsv = COREADPhosphoProteome.data.tsv
|
34
|
+
name, seq, site, kinases, name, *cell_lines = tsv.fields
|
35
|
+
tsv.add_field "Phosphosite" do |uni,values_list|
|
36
|
+
Misc.zip_fields(values_list).collect{|values|
|
37
|
+
name, seq, site, kinases, kegg_name, *vals = values
|
38
|
+
[name, site] * ":"
|
39
|
+
}
|
40
|
+
end
|
41
|
+
tsv.reorder "Phosphosite", cell_lines, :zipped => true
|
42
|
+
end
|
43
|
+
|
44
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_binary, :proc do
|
45
|
+
require 'rbbt/matrix'
|
46
|
+
require 'rbbt/matrix/barcode'
|
47
|
+
|
48
|
+
m = RbbtMatrix.new COREADPhosphoProteome.phosphosite_levels.find
|
49
|
+
a = m.to_activity(3).tsv(false)
|
50
|
+
a
|
51
|
+
end
|
52
|
+
|
53
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_present, :proc do
|
54
|
+
require 'rbbt/sources/signor'
|
55
|
+
signor = Signor.phospho_sites.tsv
|
56
|
+
|
57
|
+
signor.add_field "Fixed" do |k,l|
|
58
|
+
case l.uniq.length
|
59
|
+
when 1
|
60
|
+
l.first
|
61
|
+
when 2
|
62
|
+
"Unclear"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
signor = signor.slice("Fixed").to_single
|
66
|
+
|
67
|
+
parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
|
68
|
+
dumper = TSV::Dumper.new parser.options
|
69
|
+
dumper.init
|
70
|
+
cell_lines = parser.fields
|
71
|
+
TSV.traverse parser, :into => dumper do |site,values|
|
72
|
+
osite = site
|
73
|
+
site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
|
74
|
+
next unless signor.include? site
|
75
|
+
new_values = values.flatten.zip(cell_lines).collect{|value,cell_line|
|
76
|
+
next if signor[site] == "Unclear"
|
77
|
+
case value
|
78
|
+
when nil, ""
|
79
|
+
signor[site] == "Activates" ? -1 : 1
|
80
|
+
else
|
81
|
+
signor[site] == "Activates" ? 1 : -1
|
82
|
+
end
|
83
|
+
}
|
84
|
+
[site, new_values]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_100, :proc do
|
89
|
+
require 'rbbt/sources/signor'
|
90
|
+
signor = Signor.phospho_sites.tsv
|
91
|
+
|
92
|
+
signor.add_field "Fixed" do |k,l|
|
93
|
+
case l.uniq.length
|
94
|
+
when 1
|
95
|
+
l.first
|
96
|
+
when 2
|
97
|
+
"Unclear"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
signor = signor.slice("Fixed").to_single
|
101
|
+
|
102
|
+
parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
|
103
|
+
dumper = TSV::Dumper.new parser.options
|
104
|
+
dumper.init
|
105
|
+
TSV.traverse parser, :into => dumper do |site,values|
|
106
|
+
osite = site
|
107
|
+
site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
|
108
|
+
next unless signor.include? site
|
109
|
+
new_values = values.flatten.collect{|value|
|
110
|
+
next if signor[site] == "Unclear"
|
111
|
+
case value
|
112
|
+
when nil, ""
|
113
|
+
signor[site] == "Activates" ? -1 : 1
|
114
|
+
else
|
115
|
+
if value.to_f >= 100
|
116
|
+
signor[site] == "Activates" ? 1 : -1
|
117
|
+
else
|
118
|
+
signor[site] == "Activates" ? -1 : 1
|
119
|
+
end
|
120
|
+
end
|
121
|
+
}
|
122
|
+
[site, new_values]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_levels, :proc do
|
127
|
+
require 'rbbt/sources/signor'
|
128
|
+
signor = Signor.phospho_sites.tsv
|
129
|
+
|
130
|
+
signor.add_field "Fixed" do |k,l|
|
131
|
+
case l.uniq.length
|
132
|
+
when 1
|
133
|
+
l.first
|
134
|
+
when 2
|
135
|
+
"Unclear"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
signor = signor.slice("Fixed").to_single
|
139
|
+
|
140
|
+
|
141
|
+
parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_binary
|
142
|
+
dumper = TSV::Dumper.new parser.options
|
143
|
+
dumper.init
|
144
|
+
TSV.traverse parser, :into => dumper do |site,values|
|
145
|
+
osite = site
|
146
|
+
site = site.first if Array === site
|
147
|
+
site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
|
148
|
+
next unless signor.include? site
|
149
|
+
max = values.flatten.max
|
150
|
+
new_values = values.flatten.collect{|value|
|
151
|
+
next if signor[site] == "Unclear"
|
152
|
+
case value
|
153
|
+
when nil, ""
|
154
|
+
signor[site] == "Activates" ? -1 : 1
|
155
|
+
else
|
156
|
+
if value == max
|
157
|
+
signor[site] == "Activates" ? 1 : -1
|
158
|
+
else
|
159
|
+
signor[site] == "Activates" ? -1 : 1
|
160
|
+
end
|
161
|
+
end
|
162
|
+
}
|
163
|
+
[site, new_values]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.cascade_levels, :proc do
|
168
|
+
require 'rbbt/sources/CASCADE'
|
169
|
+
|
170
|
+
cascade_proteins = CASCADE.members.tsv.values.flatten.compact.uniq
|
171
|
+
tsv = COREADPhosphoProteome.phosphosite_levels.tsv
|
172
|
+
tsv.select do |site,values|
|
173
|
+
cascade_proteins.include? site.split(":").first
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
iif COREADPhosphoProteome.data.produce.find if __FILE__ == $0
|
179
|
+
iif COREADPhosphoProteome.phosphosite_levels.produce.find if __FILE__ == $0
|
180
|
+
iif COREADPhosphoProteome.phosphosite_binary.produce.find if __FILE__ == $0
|
181
|
+
iif COREADPhosphoProteome.signor_activity_present.produce(true).find if __FILE__ == $0
|
182
|
+
iif COREADPhosphoProteome.cascade_levels.produce.find if __FILE__ == $0
|
183
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ArrayExpress
|
2
|
+
|
3
|
+
def self.tpm(code, organism = Organism.default_code("Hsa"))
|
4
|
+
url = "https://www.ebi.ac.uk/gxa/experiments-content/#{code}/resources/ExperimentDownloadSupplier.RnaSeqBaseline/tpms.tsv"
|
5
|
+
io = TSV.traverse Open.open(url), :type => :line, :into => :stream do |line|
|
6
|
+
next if line =~ /^#/
|
7
|
+
parts = line.split("\t")
|
8
|
+
line = parts[0] << "\t" << parts[2..-1] * "\t"
|
9
|
+
line = "#" + line if line =~ /Gene ID/
|
10
|
+
line
|
11
|
+
end
|
12
|
+
tsv = TSV.open(io, :type => :list, :cast => :to_f)
|
13
|
+
tsv.key_field = "Ensembl Gene ID"
|
14
|
+
tsv.namespace = organism
|
15
|
+
tsv
|
16
|
+
end
|
17
|
+
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'rbbt-util'
|
2
2
|
require 'rbbt/resource'
|
3
3
|
|
4
|
-
module
|
4
|
+
module PhosphoSitePlus
|
5
5
|
extend Resource
|
6
|
-
self.subdir = 'share/databases/
|
6
|
+
self.subdir = 'share/databases/PhosphoSitePlus'
|
7
7
|
|
8
8
|
def self.organism(org="Hsa")
|
9
9
|
require 'rbbt/sources/organism'
|
@@ -18,7 +18,7 @@ module PhosphoSitePlues
|
|
18
18
|
|
19
19
|
|
20
20
|
|
21
|
-
ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
|
21
|
+
ALL_FILES = %w(Acetylation_site_dataset.gz Disease-associated_sites.gz
|
22
22
|
Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
|
23
23
|
O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
|
24
24
|
Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
|
@@ -26,14 +26,14 @@ Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
|
|
26
26
|
Ubiquitination_site_dataset.gz)
|
27
27
|
|
28
28
|
ALL_FILES.each do |file|
|
29
|
-
|
29
|
+
PhosphoSitePlus.claim PhosphoSitePlus[".source"][file], :proc do |f|
|
30
30
|
raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
|
35
|
-
|
34
|
+
PhosphoSitePlus.claim PhosphoSitePlus.kinase_substrate, :proc do
|
35
|
+
PhosphoSitePlus[".source/Kinase_Substrate_Dataset.gz"].produce
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
iif
|
39
|
+
iif PhosphoSitePlus.data.produce.find if __FILE__ == $0
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -228,26 +228,27 @@ module PubMed
|
|
228
228
|
result = {}
|
229
229
|
values = []
|
230
230
|
chunks = Misc.divide(ids, (ids.length / 20) + 1)
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
231
|
+
Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
|
232
|
+
chunks.each do |list|
|
233
|
+
begin
|
234
|
+
Misc.try3times do
|
235
|
+
url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
236
|
+
|
237
|
+
postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
|
238
|
+
xml = TmpFile.with_file(postdata) do |postfile|
|
239
|
+
#Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
|
240
|
+
Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
|
241
|
+
end
|
242
|
+
|
243
|
+
values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
|
241
244
|
end
|
242
|
-
|
243
|
-
|
245
|
+
rescue Aborted
|
246
|
+
raise $!
|
247
|
+
rescue Exception
|
248
|
+
Log.exception $!
|
249
|
+
ensure
|
250
|
+
bar.tick
|
244
251
|
end
|
245
|
-
rescue Aborted
|
246
|
-
raise $!
|
247
|
-
rescue Exception
|
248
|
-
Log.exception $!
|
249
|
-
ensure
|
250
|
-
bar.tick
|
251
252
|
end
|
252
253
|
end
|
253
254
|
|
data/lib/rbbt/sources/signor.rb
CHANGED
@@ -100,7 +100,42 @@ module Signor
|
|
100
100
|
|
101
101
|
Misc.collapse_stream dumper.stream
|
102
102
|
end
|
103
|
+
|
104
|
+
Signor.claim Signor.phospho_sites, :proc do
|
105
|
+
uni2name = UniProt.identifiers.Hsa.index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
|
106
|
+
|
107
|
+
dumper = TSV::Dumper.new :key_field => "Phosphosite", :fields => ["Effect"], :type => :flat, :organism => Signor.organism
|
108
|
+
dumper.init
|
109
|
+
|
110
|
+
TSV.traverse Signor.protein_protein, :into => dumper, :bar => true do |source, values|
|
111
|
+
res = []
|
112
|
+
res.extend MultipleResult
|
113
|
+
Misc.zip_fields(values).each do |target, effect, mechanism, residue|
|
114
|
+
kinase = case mechanism
|
115
|
+
when "phosphorylation"
|
116
|
+
true
|
117
|
+
when "dephosphorylation"
|
118
|
+
false
|
119
|
+
else
|
120
|
+
next
|
121
|
+
end
|
122
|
+
name = uni2name[target]
|
123
|
+
next if name.nil?
|
124
|
+
next if residue.nil? or residue.empty?
|
125
|
+
site = [name, residue] * ":"
|
126
|
+
positive = effect.include? "up-regulates"
|
127
|
+
|
128
|
+
activates = kinase && positive || (!kinase && !positive)
|
129
|
+
|
130
|
+
res << [site, activates ? "Activates" : "Deactivates"]
|
131
|
+
end
|
132
|
+
res
|
133
|
+
end
|
134
|
+
|
135
|
+
TSV.collapse_stream(dumper)
|
136
|
+
end
|
103
137
|
end
|
104
138
|
|
105
|
-
iif Signor.tf_tg.produce
|
139
|
+
iif Signor.tf_tg.produce.find if __FILE__ == $0
|
140
|
+
iif Signor.phospho_sites.produce(true).find if __FILE__ == $0
|
106
141
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.30
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -90,6 +90,7 @@ files:
|
|
90
90
|
- etc/biomart/missing_in_archive
|
91
91
|
- etc/organisms
|
92
92
|
- lib/rbbt/sources/CASCADE.rb
|
93
|
+
- lib/rbbt/sources/COREAD_phospho_proteome.rb
|
93
94
|
- lib/rbbt/sources/COSTART.rb
|
94
95
|
- lib/rbbt/sources/CTCAE.rb
|
95
96
|
- lib/rbbt/sources/GTRD.rb
|
@@ -100,6 +101,7 @@ files:
|
|
100
101
|
- lib/rbbt/sources/PRO.rb
|
101
102
|
- lib/rbbt/sources/PSI_MI.rb
|
102
103
|
- lib/rbbt/sources/STITCH.rb
|
104
|
+
- lib/rbbt/sources/array_express.rb
|
103
105
|
- lib/rbbt/sources/barcode.rb
|
104
106
|
- lib/rbbt/sources/bibtex.rb
|
105
107
|
- lib/rbbt/sources/biomart.rb
|