rbbt-sources 3.1.29 → 3.1.30

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14ec519ad1a7309351fadacf8d418b197c931769
4
- data.tar.gz: 8426ce938123e929a04a07bbb6c0ab5d12be6b04
3
+ metadata.gz: 7da6b2938f54788ddcd4ee03b78e8e8b237bf478
4
+ data.tar.gz: fc847e1ddf84cb477b4f07ac8b33a393009e72ac
5
5
  SHA512:
6
- metadata.gz: d817ada6d2df2a355dd3ac7ae6c98d3261f029bd9a82a02e45c0022118ff88b99e5ead63c6d424c6a522e0c4d89f66ac4b0c00c33d4007cd9462dbb8b3923ba6
7
- data.tar.gz: adf03a5ba5263bcb10877e8823826b9239cbabc73e9fe81ace8b2d21e2da0505d6a350f7c36f44efe73af0257863ec0077b6a7255abbb7daa3dfba857b6717bc
6
+ metadata.gz: d2a2261e40edd858ca483b269d409a95e48f61acfeb07c9b31e2381e5bcffa619c06c6c5afaa4df05ba91405638bbaa94897722c66ef1610344b542746c2aa61
7
+ data.tar.gz: 7bd00cb785efd5db78ba14607436e42465055da71119d27741f20b7c7de4b791b90c321aaecc1de1d681c9d5ba75823401d03fd5da247d7a187dec56a27c4fd3
@@ -0,0 +1,183 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ # CITE: Genomic Determinants of Protein Abundance Variation in Colorectal
5
+ # Cancer Cells PMID: 28854368
6
+ #
7
+ # Roumeliotis TI, Williams SP, Gonçalves E, et al. Genomic Determinants of
8
+ # Protein Abundance Variation in Colorectal Cancer Cells. Cell Reports.
9
+ # 2017;20(9):2201-2214. doi:10.1016/j.celrep.2017.08.010.
10
+
11
+ module COREADPhosphoProteome
12
+ extend Resource
13
+ self.subdir = 'share/databases/COREADPhosphoProteome'
14
+
15
+ #def self.organism(org="Hsa")
16
+ # Organism.default_code(org)
17
+ #end
18
+
19
+ #self.search_paths = {}
20
+ #self.search_paths[:default] = :lib
21
+
22
+
23
+
24
+ COREADPhosphoProteome.claim COREADPhosphoProteome[".source/mmc3.xlsx"], :url, "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5583477/bin/mmc3.xlsx"
25
+
26
+ COREADPhosphoProteome.claim COREADPhosphoProteome.data, :proc do
27
+ require 'rbbt/tsv/excel'
28
+ io = TSV.excel COREADPhosphoProteome[".source/mmc3.xlsx"].produce.find, :text => true
29
+ TSV.collapse_stream io
30
+ end
31
+
32
+ COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_levels, :proc do
33
+ tsv = COREADPhosphoProteome.data.tsv
34
+ name, seq, site, kinases, name, *cell_lines = tsv.fields
35
+ tsv.add_field "Phosphosite" do |uni,values_list|
36
+ Misc.zip_fields(values_list).collect{|values|
37
+ name, seq, site, kinases, kegg_name, *vals = values
38
+ [name, site] * ":"
39
+ }
40
+ end
41
+ tsv.reorder "Phosphosite", cell_lines, :zipped => true
42
+ end
43
+
44
+ COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_binary, :proc do
45
+ require 'rbbt/matrix'
46
+ require 'rbbt/matrix/barcode'
47
+
48
+ m = RbbtMatrix.new COREADPhosphoProteome.phosphosite_levels.find
49
+ a = m.to_activity(3).tsv(false)
50
+ a
51
+ end
52
+
53
+ COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_present, :proc do
54
+ require 'rbbt/sources/signor'
55
+ signor = Signor.phospho_sites.tsv
56
+
57
+ signor.add_field "Fixed" do |k,l|
58
+ case l.uniq.length
59
+ when 1
60
+ l.first
61
+ when 2
62
+ "Unclear"
63
+ end
64
+ end
65
+ signor = signor.slice("Fixed").to_single
66
+
67
+ parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
68
+ dumper = TSV::Dumper.new parser.options
69
+ dumper.init
70
+ cell_lines = parser.fields
71
+ TSV.traverse parser, :into => dumper do |site,values|
72
+ osite = site
73
+ site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
74
+ next unless signor.include? site
75
+ new_values = values.flatten.zip(cell_lines).collect{|value,cell_line|
76
+ next if signor[site] == "Unclear"
77
+ case value
78
+ when nil, ""
79
+ signor[site] == "Activates" ? -1 : 1
80
+ else
81
+ signor[site] == "Activates" ? 1 : -1
82
+ end
83
+ }
84
+ [site, new_values]
85
+ end
86
+ end
87
+
88
+ COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_100, :proc do
89
+ require 'rbbt/sources/signor'
90
+ signor = Signor.phospho_sites.tsv
91
+
92
+ signor.add_field "Fixed" do |k,l|
93
+ case l.uniq.length
94
+ when 1
95
+ l.first
96
+ when 2
97
+ "Unclear"
98
+ end
99
+ end
100
+ signor = signor.slice("Fixed").to_single
101
+
102
+ parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
103
+ dumper = TSV::Dumper.new parser.options
104
+ dumper.init
105
+ TSV.traverse parser, :into => dumper do |site,values|
106
+ osite = site
107
+ site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
108
+ next unless signor.include? site
109
+ new_values = values.flatten.collect{|value|
110
+ next if signor[site] == "Unclear"
111
+ case value
112
+ when nil, ""
113
+ signor[site] == "Activates" ? -1 : 1
114
+ else
115
+ if value.to_f >= 100
116
+ signor[site] == "Activates" ? 1 : -1
117
+ else
118
+ signor[site] == "Activates" ? -1 : 1
119
+ end
120
+ end
121
+ }
122
+ [site, new_values]
123
+ end
124
+ end
125
+
126
+ COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_levels, :proc do
127
+ require 'rbbt/sources/signor'
128
+ signor = Signor.phospho_sites.tsv
129
+
130
+ signor.add_field "Fixed" do |k,l|
131
+ case l.uniq.length
132
+ when 1
133
+ l.first
134
+ when 2
135
+ "Unclear"
136
+ end
137
+ end
138
+ signor = signor.slice("Fixed").to_single
139
+
140
+
141
+ parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_binary
142
+ dumper = TSV::Dumper.new parser.options
143
+ dumper.init
144
+ TSV.traverse parser, :into => dumper do |site,values|
145
+ osite = site
146
+ site = site.first if Array === site
147
+ site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
148
+ next unless signor.include? site
149
+ max = values.flatten.max
150
+ new_values = values.flatten.collect{|value|
151
+ next if signor[site] == "Unclear"
152
+ case value
153
+ when nil, ""
154
+ signor[site] == "Activates" ? -1 : 1
155
+ else
156
+ if value == max
157
+ signor[site] == "Activates" ? 1 : -1
158
+ else
159
+ signor[site] == "Activates" ? -1 : 1
160
+ end
161
+ end
162
+ }
163
+ [site, new_values]
164
+ end
165
+ end
166
+
167
+ COREADPhosphoProteome.claim COREADPhosphoProteome.cascade_levels, :proc do
168
+ require 'rbbt/sources/CASCADE'
169
+
170
+ cascade_proteins = CASCADE.members.tsv.values.flatten.compact.uniq
171
+ tsv = COREADPhosphoProteome.phosphosite_levels.tsv
172
+ tsv.select do |site,values|
173
+ cascade_proteins.include? site.split(":").first
174
+ end
175
+ end
176
+ end
177
+
178
+ iif COREADPhosphoProteome.data.produce.find if __FILE__ == $0
179
+ iif COREADPhosphoProteome.phosphosite_levels.produce.find if __FILE__ == $0
180
+ iif COREADPhosphoProteome.phosphosite_binary.produce.find if __FILE__ == $0
181
+ iif COREADPhosphoProteome.signor_activity_present.produce(true).find if __FILE__ == $0
182
+ iif COREADPhosphoProteome.cascade_levels.produce.find if __FILE__ == $0
183
+
@@ -0,0 +1,17 @@
1
+ module ArrayExpress
2
+
3
+ def self.tpm(code, organism = Organism.default_code("Hsa"))
4
+ url = "https://www.ebi.ac.uk/gxa/experiments-content/#{code}/resources/ExperimentDownloadSupplier.RnaSeqBaseline/tpms.tsv"
5
+ io = TSV.traverse Open.open(url), :type => :line, :into => :stream do |line|
6
+ next if line =~ /^#/
7
+ parts = line.split("\t")
8
+ line = parts[0] << "\t" << parts[2..-1] * "\t"
9
+ line = "#" + line if line =~ /Gene ID/
10
+ line
11
+ end
12
+ tsv = TSV.open(io, :type => :list, :cast => :to_f)
13
+ tsv.key_field = "Ensembl Gene ID"
14
+ tsv.namespace = organism
15
+ tsv
16
+ end
17
+ end
@@ -1,9 +1,9 @@
1
1
  require 'rbbt-util'
2
2
  require 'rbbt/resource'
3
3
 
4
- module PhosphoSitePlues
4
+ module PhosphoSitePlus
5
5
  extend Resource
6
- self.subdir = 'share/databases/PhosphoSitePlues'
6
+ self.subdir = 'share/databases/PhosphoSitePlus'
7
7
 
8
8
  def self.organism(org="Hsa")
9
9
  require 'rbbt/sources/organism'
@@ -18,7 +18,7 @@ module PhosphoSitePlues
18
18
 
19
19
 
20
20
 
21
- ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
21
+ ALL_FILES = %w(Acetylation_site_dataset.gz Disease-associated_sites.gz
22
22
  Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
23
23
  O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
24
24
  Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
@@ -26,14 +26,14 @@ Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
26
26
  Ubiquitination_site_dataset.gz)
27
27
 
28
28
  ALL_FILES.each do |file|
29
- PhosphoSitePlues.claim PhosphoSitePlues[".source"][file], :proc do |f|
29
+ PhosphoSitePlus.claim PhosphoSitePlus[".source"][file], :proc do |f|
30
30
  raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
31
31
  end
32
32
  end
33
33
 
34
- PhosphoSitePlues.claim PhosphoSitePlues.kinase_substrate, :proc do
35
- PhosphoSitePlues[".source/Kinase_Substrate_Dataset.gz"]
34
+ PhosphoSitePlus.claim PhosphoSitePlus.kinase_substrate, :proc do
35
+ PhosphoSitePlus[".source/Kinase_Substrate_Dataset.gz"].produce
36
36
  end
37
37
  end
38
38
 
39
- iif PhosphoSitePlues.data.produce.find if __FILE__ == $0
39
+ iif PhosphoSitePlus.data.produce.find if __FILE__ == $0
@@ -228,26 +228,27 @@ module PubMed
228
228
  result = {}
229
229
  values = []
230
230
  chunks = Misc.divide(ids, (ids.length / 20) + 1)
231
- bar = Log::ProgressBar.new_bar(chunks.length, :desc => "Downloading articles from PubMed")
232
- chunks.each do |list|
233
- begin
234
- Misc.try3times do
235
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
236
-
237
- postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
238
- xml = TmpFile.with_file(postdata) do |postfile|
239
- #Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
240
- Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
231
+ Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
232
+ chunks.each do |list|
233
+ begin
234
+ Misc.try3times do
235
+ url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
236
+
237
+ postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
238
+ xml = TmpFile.with_file(postdata) do |postfile|
239
+ #Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
240
+ Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
241
+ end
242
+
243
+ values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
241
244
  end
242
-
243
- values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
245
+ rescue Aborted
246
+ raise $!
247
+ rescue Exception
248
+ Log.exception $!
249
+ ensure
250
+ bar.tick
244
251
  end
245
- rescue Aborted
246
- raise $!
247
- rescue Exception
248
- Log.exception $!
249
- ensure
250
- bar.tick
251
252
  end
252
253
  end
253
254
 
@@ -100,7 +100,42 @@ module Signor
100
100
 
101
101
  Misc.collapse_stream dumper.stream
102
102
  end
103
+
104
+ Signor.claim Signor.phospho_sites, :proc do
105
+ uni2name = UniProt.identifiers.Hsa.index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
106
+
107
+ dumper = TSV::Dumper.new :key_field => "Phosphosite", :fields => ["Effect"], :type => :flat, :organism => Signor.organism
108
+ dumper.init
109
+
110
+ TSV.traverse Signor.protein_protein, :into => dumper, :bar => true do |source, values|
111
+ res = []
112
+ res.extend MultipleResult
113
+ Misc.zip_fields(values).each do |target, effect, mechanism, residue|
114
+ kinase = case mechanism
115
+ when "phosphorylation"
116
+ true
117
+ when "dephosphorylation"
118
+ false
119
+ else
120
+ next
121
+ end
122
+ name = uni2name[target]
123
+ next if name.nil?
124
+ next if residue.nil? or residue.empty?
125
+ site = [name, residue] * ":"
126
+ positive = effect.include? "up-regulates"
127
+
128
+ activates = kinase && positive || (!kinase && !positive)
129
+
130
+ res << [site, activates ? "Activates" : "Deactivates"]
131
+ end
132
+ res
133
+ end
134
+
135
+ TSV.collapse_stream(dumper)
136
+ end
103
137
  end
104
138
 
105
- iif Signor.tf_tg.produce(true).find if __FILE__ == $0
139
+ iif Signor.tf_tg.produce.find if __FILE__ == $0
140
+ iif Signor.phospho_sites.produce(true).find if __FILE__ == $0
106
141
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.29
4
+ version: 3.1.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-15 00:00:00.000000000 Z
11
+ date: 2018-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -90,6 +90,7 @@ files:
90
90
  - etc/biomart/missing_in_archive
91
91
  - etc/organisms
92
92
  - lib/rbbt/sources/CASCADE.rb
93
+ - lib/rbbt/sources/COREAD_phospho_proteome.rb
93
94
  - lib/rbbt/sources/COSTART.rb
94
95
  - lib/rbbt/sources/CTCAE.rb
95
96
  - lib/rbbt/sources/GTRD.rb
@@ -100,6 +101,7 @@ files:
100
101
  - lib/rbbt/sources/PRO.rb
101
102
  - lib/rbbt/sources/PSI_MI.rb
102
103
  - lib/rbbt/sources/STITCH.rb
104
+ - lib/rbbt/sources/array_express.rb
103
105
  - lib/rbbt/sources/barcode.rb
104
106
  - lib/rbbt/sources/bibtex.rb
105
107
  - lib/rbbt/sources/biomart.rb