rbbt-sources 3.1.29 → 3.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14ec519ad1a7309351fadacf8d418b197c931769
4
- data.tar.gz: 8426ce938123e929a04a07bbb6c0ab5d12be6b04
3
+ metadata.gz: 7da6b2938f54788ddcd4ee03b78e8e8b237bf478
4
+ data.tar.gz: fc847e1ddf84cb477b4f07ac8b33a393009e72ac
5
5
  SHA512:
6
- metadata.gz: d817ada6d2df2a355dd3ac7ae6c98d3261f029bd9a82a02e45c0022118ff88b99e5ead63c6d424c6a522e0c4d89f66ac4b0c00c33d4007cd9462dbb8b3923ba6
7
- data.tar.gz: adf03a5ba5263bcb10877e8823826b9239cbabc73e9fe81ace8b2d21e2da0505d6a350f7c36f44efe73af0257863ec0077b6a7255abbb7daa3dfba857b6717bc
6
+ metadata.gz: d2a2261e40edd858ca483b269d409a95e48f61acfeb07c9b31e2381e5bcffa619c06c6c5afaa4df05ba91405638bbaa94897722c66ef1610344b542746c2aa61
7
+ data.tar.gz: 7bd00cb785efd5db78ba14607436e42465055da71119d27741f20b7c7de4b791b90c321aaecc1de1d681c9d5ba75823401d03fd5da247d7a187dec56a27c4fd3
@@ -0,0 +1,183 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ # CITE: Genomic Determinants of Protein Abundance Variation in Colorectal
5
+ # Cancer Cells PMID: 28854368
6
+ #
7
+ # Roumeliotis TI, Williams SP, Gonçalves E, et al. Genomic Determinants of
8
+ # Protein Abundance Variation in Colorectal Cancer Cells. Cell Reports.
9
+ # 2017;20(9):2201-2214. doi:10.1016/j.celrep.2017.08.010.
10
+
11
+ module COREADPhosphoProteome
12
+ extend Resource
13
+ self.subdir = 'share/databases/COREADPhosphoProteome'
14
+
15
+ #def self.organism(org="Hsa")
16
+ # Organism.default_code(org)
17
+ #end
18
+
19
+ #self.search_paths = {}
20
+ #self.search_paths[:default] = :lib
21
+
22
+
23
+
24
+ COREADPhosphoProteome.claim COREADPhosphoProteome[".source/mmc3.xlsx"], :url, "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5583477/bin/mmc3.xlsx"
25
+
26
+ COREADPhosphoProteome.claim COREADPhosphoProteome.data, :proc do
27
+ require 'rbbt/tsv/excel'
28
+ io = TSV.excel COREADPhosphoProteome[".source/mmc3.xlsx"].produce.find, :text => true
29
+ TSV.collapse_stream io
30
+ end
31
+
32
+ COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_levels, :proc do
33
+ tsv = COREADPhosphoProteome.data.tsv
34
+ name, seq, site, kinases, name, *cell_lines = tsv.fields
35
+ tsv.add_field "Phosphosite" do |uni,values_list|
36
+ Misc.zip_fields(values_list).collect{|values|
37
+ name, seq, site, kinases, kegg_name, *vals = values
38
+ [name, site] * ":"
39
+ }
40
+ end
41
+ tsv.reorder "Phosphosite", cell_lines, :zipped => true
42
+ end
43
+
44
+ COREADPhosphoProteome.claim COREADPhosphoProteome.phosphosite_binary, :proc do
45
+ require 'rbbt/matrix'
46
+ require 'rbbt/matrix/barcode'
47
+
48
+ m = RbbtMatrix.new COREADPhosphoProteome.phosphosite_levels.find
49
+ a = m.to_activity(3).tsv(false)
50
+ a
51
+ end
52
+
53
+ COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_present, :proc do
54
+ require 'rbbt/sources/signor'
55
+ signor = Signor.phospho_sites.tsv
56
+
57
+ signor.add_field "Fixed" do |k,l|
58
+ case l.uniq.length
59
+ when 1
60
+ l.first
61
+ when 2
62
+ "Unclear"
63
+ end
64
+ end
65
+ signor = signor.slice("Fixed").to_single
66
+
67
+ parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
68
+ dumper = TSV::Dumper.new parser.options
69
+ dumper.init
70
+ cell_lines = parser.fields
71
+ TSV.traverse parser, :into => dumper do |site,values|
72
+ osite = site
73
+ site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
74
+ next unless signor.include? site
75
+ new_values = values.flatten.zip(cell_lines).collect{|value,cell_line|
76
+ next if signor[site] == "Unclear"
77
+ case value
78
+ when nil, ""
79
+ signor[site] == "Activates" ? -1 : 1
80
+ else
81
+ signor[site] == "Activates" ? 1 : -1
82
+ end
83
+ }
84
+ [site, new_values]
85
+ end
86
+ end
87
+
88
+ COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_100, :proc do
89
+ require 'rbbt/sources/signor'
90
+ signor = Signor.phospho_sites.tsv
91
+
92
+ signor.add_field "Fixed" do |k,l|
93
+ case l.uniq.length
94
+ when 1
95
+ l.first
96
+ when 2
97
+ "Unclear"
98
+ end
99
+ end
100
+ signor = signor.slice("Fixed").to_single
101
+
102
+ parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_levels
103
+ dumper = TSV::Dumper.new parser.options
104
+ dumper.init
105
+ TSV.traverse parser, :into => dumper do |site,values|
106
+ osite = site
107
+ site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
108
+ next unless signor.include? site
109
+ new_values = values.flatten.collect{|value|
110
+ next if signor[site] == "Unclear"
111
+ case value
112
+ when nil, ""
113
+ signor[site] == "Activates" ? -1 : 1
114
+ else
115
+ if value.to_f >= 100
116
+ signor[site] == "Activates" ? 1 : -1
117
+ else
118
+ signor[site] == "Activates" ? -1 : 1
119
+ end
120
+ end
121
+ }
122
+ [site, new_values]
123
+ end
124
+ end
125
+
126
+ COREADPhosphoProteome.claim COREADPhosphoProteome.signor_activity_levels, :proc do
127
+ require 'rbbt/sources/signor'
128
+ signor = Signor.phospho_sites.tsv
129
+
130
+ signor.add_field "Fixed" do |k,l|
131
+ case l.uniq.length
132
+ when 1
133
+ l.first
134
+ when 2
135
+ "Unclear"
136
+ end
137
+ end
138
+ signor = signor.slice("Fixed").to_single
139
+
140
+
141
+ parser = TSV::Parser.new COREADPhosphoProteome.phosphosite_binary
142
+ dumper = TSV::Dumper.new parser.options
143
+ dumper.init
144
+ TSV.traverse parser, :into => dumper do |site,values|
145
+ osite = site
146
+ site = site.first if Array === site
147
+ site = site.sub(':S', ':Ser').sub(':T', ':Thr').sub(':Y', ':Tyr')
148
+ next unless signor.include? site
149
+ max = values.flatten.max
150
+ new_values = values.flatten.collect{|value|
151
+ next if signor[site] == "Unclear"
152
+ case value
153
+ when nil, ""
154
+ signor[site] == "Activates" ? -1 : 1
155
+ else
156
+ if value == max
157
+ signor[site] == "Activates" ? 1 : -1
158
+ else
159
+ signor[site] == "Activates" ? -1 : 1
160
+ end
161
+ end
162
+ }
163
+ [site, new_values]
164
+ end
165
+ end
166
+
167
+ COREADPhosphoProteome.claim COREADPhosphoProteome.cascade_levels, :proc do
168
+ require 'rbbt/sources/CASCADE'
169
+
170
+ cascade_proteins = CASCADE.members.tsv.values.flatten.compact.uniq
171
+ tsv = COREADPhosphoProteome.phosphosite_levels.tsv
172
+ tsv.select do |site,values|
173
+ cascade_proteins.include? site.split(":").first
174
+ end
175
+ end
176
+ end
177
+
178
+ iif COREADPhosphoProteome.data.produce.find if __FILE__ == $0
179
+ iif COREADPhosphoProteome.phosphosite_levels.produce.find if __FILE__ == $0
180
+ iif COREADPhosphoProteome.phosphosite_binary.produce.find if __FILE__ == $0
181
+ iif COREADPhosphoProteome.signor_activity_present.produce(true).find if __FILE__ == $0
182
+ iif COREADPhosphoProteome.cascade_levels.produce.find if __FILE__ == $0
183
+
@@ -0,0 +1,17 @@
1
+ module ArrayExpress
2
+
3
+ def self.tpm(code, organism = Organism.default_code("Hsa"))
4
+ url = "https://www.ebi.ac.uk/gxa/experiments-content/#{code}/resources/ExperimentDownloadSupplier.RnaSeqBaseline/tpms.tsv"
5
+ io = TSV.traverse Open.open(url), :type => :line, :into => :stream do |line|
6
+ next if line =~ /^#/
7
+ parts = line.split("\t")
8
+ line = parts[0] << "\t" << parts[2..-1] * "\t"
9
+ line = "#" + line if line =~ /Gene ID/
10
+ line
11
+ end
12
+ tsv = TSV.open(io, :type => :list, :cast => :to_f)
13
+ tsv.key_field = "Ensembl Gene ID"
14
+ tsv.namespace = organism
15
+ tsv
16
+ end
17
+ end
@@ -1,9 +1,9 @@
1
1
  require 'rbbt-util'
2
2
  require 'rbbt/resource'
3
3
 
4
- module PhosphoSitePlues
4
+ module PhosphoSitePlus
5
5
  extend Resource
6
- self.subdir = 'share/databases/PhosphoSitePlues'
6
+ self.subdir = 'share/databases/PhosphoSitePlus'
7
7
 
8
8
  def self.organism(org="Hsa")
9
9
  require 'rbbt/sources/organism'
@@ -18,7 +18,7 @@ module PhosphoSitePlues
18
18
 
19
19
 
20
20
 
21
- ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
21
+ ALL_FILES = %w(Acetylation_site_dataset.gz Disease-associated_sites.gz
22
22
  Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
23
23
  O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
24
24
  Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
@@ -26,14 +26,14 @@ Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
26
26
  Ubiquitination_site_dataset.gz)
27
27
 
28
28
  ALL_FILES.each do |file|
29
- PhosphoSitePlues.claim PhosphoSitePlues[".source"][file], :proc do |f|
29
+ PhosphoSitePlus.claim PhosphoSitePlus[".source"][file], :proc do |f|
30
30
  raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
31
31
  end
32
32
  end
33
33
 
34
- PhosphoSitePlues.claim PhosphoSitePlues.kinase_substrate, :proc do
35
- PhosphoSitePlues[".source/Kinase_Substrate_Dataset.gz"]
34
+ PhosphoSitePlus.claim PhosphoSitePlus.kinase_substrate, :proc do
35
+ PhosphoSitePlus[".source/Kinase_Substrate_Dataset.gz"].produce
36
36
  end
37
37
  end
38
38
 
39
- iif PhosphoSitePlues.data.produce.find if __FILE__ == $0
39
+ iif PhosphoSitePlus.data.produce.find if __FILE__ == $0
@@ -228,26 +228,27 @@ module PubMed
228
228
  result = {}
229
229
  values = []
230
230
  chunks = Misc.divide(ids, (ids.length / 20) + 1)
231
- bar = Log::ProgressBar.new_bar(chunks.length, :desc => "Downloading articles from PubMed")
232
- chunks.each do |list|
233
- begin
234
- Misc.try3times do
235
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
236
-
237
- postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
238
- xml = TmpFile.with_file(postdata) do |postfile|
239
- #Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
240
- Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
231
+ Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
232
+ chunks.each do |list|
233
+ begin
234
+ Misc.try3times do
235
+ url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
236
+
237
+ postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
238
+ xml = TmpFile.with_file(postdata) do |postfile|
239
+ #Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
240
+ Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
241
+ end
242
+
243
+ values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
241
244
  end
242
-
243
- values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
245
+ rescue Aborted
246
+ raise $!
247
+ rescue Exception
248
+ Log.exception $!
249
+ ensure
250
+ bar.tick
244
251
  end
245
- rescue Aborted
246
- raise $!
247
- rescue Exception
248
- Log.exception $!
249
- ensure
250
- bar.tick
251
252
  end
252
253
  end
253
254
 
@@ -100,7 +100,42 @@ module Signor
100
100
 
101
101
  Misc.collapse_stream dumper.stream
102
102
  end
103
+
104
+ Signor.claim Signor.phospho_sites, :proc do
105
+ uni2name = UniProt.identifiers.Hsa.index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
106
+
107
+ dumper = TSV::Dumper.new :key_field => "Phosphosite", :fields => ["Effect"], :type => :flat, :organism => Signor.organism
108
+ dumper.init
109
+
110
+ TSV.traverse Signor.protein_protein, :into => dumper, :bar => true do |source, values|
111
+ res = []
112
+ res.extend MultipleResult
113
+ Misc.zip_fields(values).each do |target, effect, mechanism, residue|
114
+ kinase = case mechanism
115
+ when "phosphorylation"
116
+ true
117
+ when "dephosphorylation"
118
+ false
119
+ else
120
+ next
121
+ end
122
+ name = uni2name[target]
123
+ next if name.nil?
124
+ next if residue.nil? or residue.empty?
125
+ site = [name, residue] * ":"
126
+ positive = effect.include? "up-regulates"
127
+
128
+ activates = kinase && positive || (!kinase && !positive)
129
+
130
+ res << [site, activates ? "Activates" : "Deactivates"]
131
+ end
132
+ res
133
+ end
134
+
135
+ TSV.collapse_stream(dumper)
136
+ end
103
137
  end
104
138
 
105
- iif Signor.tf_tg.produce(true).find if __FILE__ == $0
139
+ iif Signor.tf_tg.produce.find if __FILE__ == $0
140
+ iif Signor.phospho_sites.produce(true).find if __FILE__ == $0
106
141
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.29
4
+ version: 3.1.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-15 00:00:00.000000000 Z
11
+ date: 2018-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -90,6 +90,7 @@ files:
90
90
  - etc/biomart/missing_in_archive
91
91
  - etc/organisms
92
92
  - lib/rbbt/sources/CASCADE.rb
93
+ - lib/rbbt/sources/COREAD_phospho_proteome.rb
93
94
  - lib/rbbt/sources/COSTART.rb
94
95
  - lib/rbbt/sources/CTCAE.rb
95
96
  - lib/rbbt/sources/GTRD.rb
@@ -100,6 +101,7 @@ files:
100
101
  - lib/rbbt/sources/PRO.rb
101
102
  - lib/rbbt/sources/PSI_MI.rb
102
103
  - lib/rbbt/sources/STITCH.rb
104
+ - lib/rbbt/sources/array_express.rb
103
105
  - lib/rbbt/sources/barcode.rb
104
106
  - lib/rbbt/sources/bibtex.rb
105
107
  - lib/rbbt/sources/biomart.rb