rbbt-sources 3.1.29 → 3.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/COREAD_phospho_proteome.rb +183 -0
- data/lib/rbbt/sources/array_express.rb +17 -0
- data/lib/rbbt/sources/phospho_site_plus.rb +7 -7
- data/lib/rbbt/sources/pubmed.rb +19 -18
- data/lib/rbbt/sources/signor.rb +36 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7da6b2938f54788ddcd4ee03b78e8e8b237bf478
|
4
|
+
data.tar.gz: fc847e1ddf84cb477b4f07ac8b33a393009e72ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2a2261e40edd858ca483b269d409a95e48f61acfeb07c9b31e2381e5bcffa619c06c6c5afaa4df05ba91405638bbaa94897722c66ef1610344b542746c2aa61
|
7
|
+
data.tar.gz: 7bd00cb785efd5db78ba14607436e42465055da71119d27741f20b7c7de4b791b90c321aaecc1de1d681c9d5ba75823401d03fd5da247d7a187dec56a27c4fd3
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
# CITE: Genomic Determinants of Protein Abundance Variation in Colorectal
|
5
|
+
# Cancer Cells PMID: 28854368
|
6
|
+
#
|
7
|
+
# Roumeliotis TI, Williams SP, Gonçalves E, et al. Genomic Determinants of
|
8
|
+
# Protein Abundance Variation in Colorectal Cancer Cells. Cell Reports.
|
9
|
+
# 2017;20(9):2201-2214. doi:10.1016/j.celrep.2017.08.010.
|
10
|
+
|
11
|
+
module COREADPhosphoProteome
|
12
|
+
extend Resource
|
13
|
+
self.subdir = 'share/databases/COREADPhosphoProteome'
|
14
|
+
|
15
|
+
#def self.organism(org="Hsa")
|
16
|
+
# Organism.default_code(org)
|
17
|
+
#end
|
18
|
+
|
19
|
+
#self.search_paths = {}
|
20
|
+
#self.search_paths[:default] = :lib
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome[".source/mmc3.xlsx"], :url, "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5583477/bin/mmc3.xlsx"
|
25
|
+
|
26
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.data, :proc do
|
27
|
+
require 'rbbt/tsv/excel'
|
28
|
+
io = TSV.excel COREADPhosphoProteome[".source/mmc3.xlsx"].produce.find, :text => true
|
29
|
+
TSV.collapse_stream io
|
30
|
+
end
|
31
|
+
|
32
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_levels, :proc do
|
33
|
+
tsv = COREADPhosphoProteome.data.tsv
|
34
|
+
name, seq, site, kinases, name, *cell_lines = tsv.fields
|
35
|
+
tsv.add_field "Phosphosite" do |uni,values_list|
|
36
|
+
Misc.zip_fields(values_list).collect{|values|
|
37
|
+
name, seq, site, kinases, kegg_name, *vals = values
|
38
|
+
[name, site] * ":"
|
39
|
+
}
|
40
|
+
end
|
41
|
+
tsv.reorder "Phosphosite", cell_lines, :zipped => true
|
42
|
+
end
|
43
|
+
|
44
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_binary, :proc do
|
45
|
+
require 'rbbt/matrix'
|
46
|
+
require 'rbbt/matrix/barcode'
|
47
|
+
|
48
|
+
m = RbbtMatrix.new COREADPhosphoProteome.phosphosite_levels.find
|
49
|
+
a = m.to_activity(3).tsv(false)
|
50
|
+
a
|
51
|
+
end
|
52
|
+
|
53
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_present, :proc do
|
54
|
+
require 'rbbt/sources/signor'
|
55
|
+
signor = Signor.phospho_sites.tsv
|
56
|
+
|
57
|
+
signor.add_field "Fixed" do |k,l|
|
58
|
+
case l.uniq.length
|
59
|
+
when 1
|
60
|
+
l.first
|
61
|
+
when 2
|
62
|
+
"Unclear"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
signor = signor.slice("Fixed").to_single
|
66
|
+
|
67
|
+
parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
|
68
|
+
dumper = TSV::Dumper.new parser.options
|
69
|
+
dumper.init
|
70
|
+
cell_lines = parser.fields
|
71
|
+
TSV.traverse parser, :into => dumper do |site,values|
|
72
|
+
osite = site
|
73
|
+
site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
|
74
|
+
next unless signor.include? site
|
75
|
+
new_values = values.flatten.zip(cell_lines).collect{|value,cell_line|
|
76
|
+
next if signor[site] == "Unclear"
|
77
|
+
case value
|
78
|
+
when nil, ""
|
79
|
+
signor[site] == "Activates" ? -1 : 1
|
80
|
+
else
|
81
|
+
signor[site] == "Activates" ? 1 : -1
|
82
|
+
end
|
83
|
+
}
|
84
|
+
[site, new_values]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_100, :proc do
|
89
|
+
require 'rbbt/sources/signor'
|
90
|
+
signor = Signor.phospho_sites.tsv
|
91
|
+
|
92
|
+
signor.add_field "Fixed" do |k,l|
|
93
|
+
case l.uniq.length
|
94
|
+
when 1
|
95
|
+
l.first
|
96
|
+
when 2
|
97
|
+
"Unclear"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
signor = signor.slice("Fixed").to_single
|
101
|
+
|
102
|
+
parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
|
103
|
+
dumper = TSV::Dumper.new parser.options
|
104
|
+
dumper.init
|
105
|
+
TSV.traverse parser, :into => dumper do |site,values|
|
106
|
+
osite = site
|
107
|
+
site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
|
108
|
+
next unless signor.include? site
|
109
|
+
new_values = values.flatten.collect{|value|
|
110
|
+
next if signor[site] == "Unclear"
|
111
|
+
case value
|
112
|
+
when nil, ""
|
113
|
+
signor[site] == "Activates" ? -1 : 1
|
114
|
+
else
|
115
|
+
if value.to_f >= 100
|
116
|
+
signor[site] == "Activates" ? 1 : -1
|
117
|
+
else
|
118
|
+
signor[site] == "Activates" ? -1 : 1
|
119
|
+
end
|
120
|
+
end
|
121
|
+
}
|
122
|
+
[site, new_values]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_levels, :proc do
|
127
|
+
require 'rbbt/sources/signor'
|
128
|
+
signor = Signor.phospho_sites.tsv
|
129
|
+
|
130
|
+
signor.add_field "Fixed" do |k,l|
|
131
|
+
case l.uniq.length
|
132
|
+
when 1
|
133
|
+
l.first
|
134
|
+
when 2
|
135
|
+
"Unclear"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
signor = signor.slice("Fixed").to_single
|
139
|
+
|
140
|
+
|
141
|
+
parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_binary
|
142
|
+
dumper = TSV::Dumper.new parser.options
|
143
|
+
dumper.init
|
144
|
+
TSV.traverse parser, :into => dumper do |site,values|
|
145
|
+
osite = site
|
146
|
+
site = site.first if Array === site
|
147
|
+
site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
|
148
|
+
next unless signor.include? site
|
149
|
+
max = values.flatten.max
|
150
|
+
new_values = values.flatten.collect{|value|
|
151
|
+
next if signor[site] == "Unclear"
|
152
|
+
case value
|
153
|
+
when nil, ""
|
154
|
+
signor[site] == "Activates" ? -1 : 1
|
155
|
+
else
|
156
|
+
if value == max
|
157
|
+
signor[site] == "Activates" ? 1 : -1
|
158
|
+
else
|
159
|
+
signor[site] == "Activates" ? -1 : 1
|
160
|
+
end
|
161
|
+
end
|
162
|
+
}
|
163
|
+
[site, new_values]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
COREADPhosphoProteome.claim COREADPhosphoProteome.cascade_levels, :proc do
|
168
|
+
require 'rbbt/sources/CASCADE'
|
169
|
+
|
170
|
+
cascade_proteins = CASCADE.members.tsv.values.flatten.compact.uniq
|
171
|
+
tsv = COREADPhosphoProteome.phosphosite_levels.tsv
|
172
|
+
tsv.select do |site,values|
|
173
|
+
cascade_proteins.include? site.split(":").first
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
iif COREADPhosphoProteome.data.produce.find if __FILE__ == $0
|
179
|
+
iif COREADPhosphoProteome.phosphosite_levels.produce.find if __FILE__ == $0
|
180
|
+
iif COREADPhosphoProteome.phosphosite_binary.produce.find if __FILE__ == $0
|
181
|
+
iif COREADPhosphoProteome.signor_activity_present.produce(true).find if __FILE__ == $0
|
182
|
+
iif COREADPhosphoProteome.cascade_levels.produce.find if __FILE__ == $0
|
183
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ArrayExpress
|
2
|
+
|
3
|
+
def self.tpm(code, organism = Organism.default_code("Hsa"))
|
4
|
+
url = "https://www.ebi.ac.uk/gxa/experiments-content/#{code}/resources/ExperimentDownloadSupplier.RnaSeqBaseline/tpms.tsv"
|
5
|
+
io = TSV.traverse Open.open(url), :type => :line, :into => :stream do |line|
|
6
|
+
next if line =~ /^#/
|
7
|
+
parts = line.split("\t")
|
8
|
+
line = parts[0] << "\t" << parts[2..-1] * "\t"
|
9
|
+
line = "#" + line if line =~ /Gene ID/
|
10
|
+
line
|
11
|
+
end
|
12
|
+
tsv = TSV.open(io, :type => :list, :cast => :to_f)
|
13
|
+
tsv.key_field = "Ensembl Gene ID"
|
14
|
+
tsv.namespace = organism
|
15
|
+
tsv
|
16
|
+
end
|
17
|
+
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'rbbt-util'
|
2
2
|
require 'rbbt/resource'
|
3
3
|
|
4
|
-
module
|
4
|
+
module PhosphoSitePlus
|
5
5
|
extend Resource
|
6
|
-
self.subdir = 'share/databases/
|
6
|
+
self.subdir = 'share/databases/PhosphoSitePlus'
|
7
7
|
|
8
8
|
def self.organism(org="Hsa")
|
9
9
|
require 'rbbt/sources/organism'
|
@@ -18,7 +18,7 @@ module PhosphoSitePlues
|
|
18
18
|
|
19
19
|
|
20
20
|
|
21
|
-
ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
|
21
|
+
ALL_FILES = %w(Acetylation_site_dataset.gz Disease-associated_sites.gz
|
22
22
|
Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
|
23
23
|
O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
|
24
24
|
Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
|
@@ -26,14 +26,14 @@ Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
|
|
26
26
|
Ubiquitination_site_dataset.gz)
|
27
27
|
|
28
28
|
ALL_FILES.each do |file|
|
29
|
-
|
29
|
+
PhosphoSitePlus.claim PhosphoSitePlus[".source"][file], :proc do |f|
|
30
30
|
raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
|
35
|
-
|
34
|
+
PhosphoSitePlus.claim PhosphoSitePlus.kinase_substrate, :proc do
|
35
|
+
PhosphoSitePlus[".source/Kinase_Substrate_Dataset.gz"].produce
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
iif
|
39
|
+
iif PhosphoSitePlus.data.produce.find if __FILE__ == $0
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -228,26 +228,27 @@ module PubMed
|
|
228
228
|
result = {}
|
229
229
|
values = []
|
230
230
|
chunks = Misc.divide(ids, (ids.length / 20) + 1)
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
231
|
+
Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
|
232
|
+
chunks.each do |list|
|
233
|
+
begin
|
234
|
+
Misc.try3times do
|
235
|
+
url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
236
|
+
|
237
|
+
postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
|
238
|
+
xml = TmpFile.with_file(postdata) do |postfile|
|
239
|
+
#Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
|
240
|
+
Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
|
241
|
+
end
|
242
|
+
|
243
|
+
values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
|
241
244
|
end
|
242
|
-
|
243
|
-
|
245
|
+
rescue Aborted
|
246
|
+
raise $!
|
247
|
+
rescue Exception
|
248
|
+
Log.exception $!
|
249
|
+
ensure
|
250
|
+
bar.tick
|
244
251
|
end
|
245
|
-
rescue Aborted
|
246
|
-
raise $!
|
247
|
-
rescue Exception
|
248
|
-
Log.exception $!
|
249
|
-
ensure
|
250
|
-
bar.tick
|
251
252
|
end
|
252
253
|
end
|
253
254
|
|
data/lib/rbbt/sources/signor.rb
CHANGED
@@ -100,7 +100,42 @@ module Signor
|
|
100
100
|
|
101
101
|
Misc.collapse_stream dumper.stream
|
102
102
|
end
|
103
|
+
|
104
|
+
Signor.claim Signor.phospho_sites, :proc do
|
105
|
+
uni2name = UniProt.identifiers.Hsa.index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
|
106
|
+
|
107
|
+
dumper = TSV::Dumper.new :key_field => "Phosphosite", :fields => ["Effect"], :type => :flat, :organism => Signor.organism
|
108
|
+
dumper.init
|
109
|
+
|
110
|
+
TSV.traverse Signor.protein_protein, :into => dumper, :bar => true do |source, values|
|
111
|
+
res = []
|
112
|
+
res.extend MultipleResult
|
113
|
+
Misc.zip_fields(values).each do |target, effect, mechanism, residue|
|
114
|
+
kinase = case mechanism
|
115
|
+
when "phosphorylation"
|
116
|
+
true
|
117
|
+
when "dephosphorylation"
|
118
|
+
false
|
119
|
+
else
|
120
|
+
next
|
121
|
+
end
|
122
|
+
name = uni2name[target]
|
123
|
+
next if name.nil?
|
124
|
+
next if residue.nil? or residue.empty?
|
125
|
+
site = [name, residue] * ":"
|
126
|
+
positive = effect.include? "up-regulates"
|
127
|
+
|
128
|
+
activates = kinase && positive || (!kinase && !positive)
|
129
|
+
|
130
|
+
res << [site, activates ? "Activates" : "Deactivates"]
|
131
|
+
end
|
132
|
+
res
|
133
|
+
end
|
134
|
+
|
135
|
+
TSV.collapse_stream(dumper)
|
136
|
+
end
|
103
137
|
end
|
104
138
|
|
105
|
-
iif Signor.tf_tg.produce
|
139
|
+
iif Signor.tf_tg.produce.find if __FILE__ == $0
|
140
|
+
iif Signor.phospho_sites.produce(true).find if __FILE__ == $0
|
106
141
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.30
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -90,6 +90,7 @@ files:
|
|
90
90
|
- etc/biomart/missing_in_archive
|
91
91
|
- etc/organisms
|
92
92
|
- lib/rbbt/sources/CASCADE.rb
|
93
|
+
- lib/rbbt/sources/COREAD_phospho_proteome.rb
|
93
94
|
- lib/rbbt/sources/COSTART.rb
|
94
95
|
- lib/rbbt/sources/CTCAE.rb
|
95
96
|
- lib/rbbt/sources/GTRD.rb
|
@@ -100,6 +101,7 @@ files:
|
|
100
101
|
- lib/rbbt/sources/PRO.rb
|
101
102
|
- lib/rbbt/sources/PSI_MI.rb
|
102
103
|
- lib/rbbt/sources/STITCH.rb
|
104
|
+
- lib/rbbt/sources/array_express.rb
|
103
105
|
- lib/rbbt/sources/barcode.rb
|
104
106
|
- lib/rbbt/sources/bibtex.rb
|
105
107
|
- lib/rbbt/sources/biomart.rb
|