rbbt-sources 3.1.16 → 3.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: feef731c1d37abc9fcaf618086634f4bebde968c
4
- data.tar.gz: 58d28f8048a043d406c0cc1f72fe3b44186ead5e
3
+ metadata.gz: bf8a7403301523a87252057d4be1ac9e0f348708
4
+ data.tar.gz: '0939814d7b68cb713e05b5f1af2a572a8fffee75'
5
5
  SHA512:
6
- metadata.gz: b5fa5a78fe7e152691a92b6119202fdb0066fcf7377bafeb907b02519477221762c8b563239c26024700ee48f301e4377527f95441d87cadb50a26bc884dc626
7
- data.tar.gz: cdb897373677da35b65aa93964af005ac1048a4097643a4ae0b1a67024f67dc90da5204b5eea928429039cc63c9da275c6faf3b99cc920f3e46ca893c8f3dcab
6
+ metadata.gz: 779c0aa427ee0335a2533533179e05ef0c42c543316ea7cd9eb3c5b793f6603ea46cf63bffba71df9aed9cec0fc08df984255c1c3a83fe85c9bb5b51048395d1
7
+ data.tar.gz: 8a78a7bc1e4ba4819849c0375a1d6744e20bce89cdaf91e1c21cd26d4541f2ff91bd1d5aa9bed05e38b211051cc51774df2b0d2eb198bd4500d94c95196d48ac
@@ -1,5 +1,6 @@
1
1
  may2009
2
2
  may2012
3
+ sep2013
3
4
  jan2013
4
5
  feb2014
5
6
  dec2013
@@ -0,0 +1,177 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module CASCADE
5
+ extend Resource
6
+ self.subdir = 'share/databases/CASCADE'
7
+
8
+ #def self.organism(org="Hsa")
9
+ # Organism.default_code(org)
10
+ #end
11
+
12
+ #self.search_paths = {}
13
+ #self.search_paths[:default] = :lib
14
+
15
+
16
+ URL = 'https://bitbucket.org/asmundf/cascade'
17
+ CASCADE.claim CASCADE.interactions, :proc do
18
+ io = nil
19
+ TmpFile.with_file do |tmp|
20
+ Misc.in_dir tmp do
21
+ Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
22
+ `git clone #{URL}`
23
+ io = Open.open("cascade/cascade.tsv")
24
+ end
25
+ end
26
+
27
+ tsv = TSV.open(io, :merge => true, :header_hash => '')
28
+
29
+ new_fields = ["ENTITYB"] + (tsv.fields - ["ENTITYB"])
30
+ tsv = tsv.reorder :key, new_fields
31
+
32
+ tsv.key_field = "ENTITYA (Associated Gene Name)"
33
+ tsv.rename_field "ENTITYB", "ENTITYB (Associated Gene Name)"
34
+
35
+ tsv.process "PMID" do |values|
36
+ values.collect{|v| v.scan(/\d+/) * ";;"}
37
+ end
38
+
39
+ tsv.to_s
40
+ end
41
+
42
+ CASCADE.claim CASCADE.members, :proc do
43
+ io = nil
44
+ TmpFile.with_file do |tmp|
45
+ Misc.in_dir tmp do
46
+ Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
47
+ `git clone #{URL}`
48
+ io = Open.open("cascade/cascade_translation.tsv")
49
+ end
50
+ end
51
+
52
+ tsv = TSV.open(io, :merge => true, :header_hash => '', :type => :flat, :sep2 => /[,.]\s*/)
53
+
54
+ end
55
+
56
+ CASCADE.claim CASCADE.paradigm, :proc do
57
+
58
+ tsv = CASCADE.interactions.tsv
59
+ members = CASCADE.members.tsv
60
+
61
+ proteins = Set.new members.values.flatten.uniq
62
+ outputs = Set.new
63
+ associations = {}
64
+
65
+ tsv.through do |source, values|
66
+ values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
67
+ next if typea == 'gene'
68
+
69
+ if typeb == 'gene'
70
+ target.sub!('_g','')
71
+ type = '-t'
72
+ elsif typeb == 'output' or typea == 'output'
73
+ type = '-ap'
74
+ else
75
+ type = '-a'
76
+ end
77
+
78
+ proteins << source unless source.include? '_f' or source.include? '_c'
79
+ proteins << target unless target.include? '_f' or target.include? '_c'
80
+
81
+ outputs << source if typea == 'output'
82
+ outputs << target if typeb == 'output'
83
+
84
+ effect_symbol = '>'
85
+ effect_symbol = '|' if effect.include? 'inhibit'
86
+
87
+ associations[[source,target]] = [type, effect_symbol]
88
+ end
89
+ end
90
+
91
+ str = StringIO.new
92
+
93
+ proteins.each do |p|
94
+ next if outputs.include? p
95
+ str.puts ["protein", p] * "\t"
96
+ end
97
+
98
+ outputs.each do |o|
99
+ str.puts ["abstract", o] * "\t"
100
+ end
101
+
102
+ members.each do |e, targets|
103
+ e = e.dup
104
+ case
105
+ when e.include?('_c')
106
+ str.puts ["complex", e] * "\t"
107
+ type = 'component'
108
+ when e.include?('_f')
109
+ str.puts ["family", e] * "\t"
110
+ type = 'member'
111
+ else
112
+ next
113
+ end
114
+
115
+ targets.each do |target|
116
+ associations[[target,e]] = [type, '>']
117
+ end
118
+ end
119
+
120
+
121
+ associations.each do |p,i|
122
+ source, target = p
123
+ type, symbol = i
124
+
125
+ str.puts [source, target, [type,symbol]*""] * "\t"
126
+ end
127
+
128
+ str.rewind
129
+ str
130
+ end
131
+
132
+ CASCADE.claim CASCADE["topology.sif"], :proc do
133
+
134
+ tsv = CASCADE.interactions.tsv
135
+
136
+ str = StringIO.new
137
+
138
+ tsv.through do |source, values|
139
+ values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
140
+
141
+ effect_symbol = '->'
142
+ effect_symbol = '-|' if effect.include? 'inhibit'
143
+
144
+ str.puts [source, effect_symbol, target] * " "
145
+ end
146
+ end
147
+
148
+ str.rewind
149
+ str
150
+ end
151
+
152
+ CASCADE.claim CASCADE.output_nodes, :proc do
153
+ tsv = CASCADE.interactions.tsv
154
+
155
+ output = TSV.setup({}, :key_field => "Node", :fields => ["Sign"], :type => :single)
156
+
157
+ tsv.through do |source, values|
158
+ values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
159
+ case target
160
+ when "Antisurvival"
161
+ output[source] = -1
162
+ when "Prosurvival"
163
+ output[source] = 1
164
+ end
165
+ end
166
+ end
167
+
168
+ output.to_s
169
+ end
170
+ end
171
+
172
+ iif CASCADE.interactions.produce.find if __FILE__ == $0
173
+ iif CASCADE.members.produce.find if __FILE__ == $0
174
+ iif CASCADE.paradigm.produce.find if __FILE__ == $0
175
+ iif CASCADE["topology.sif"].produce.find if __FILE__ == $0
176
+ iif CASCADE.output_nodes.produce(true).find if __FILE__ == $0
177
+
@@ -0,0 +1,59 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module PhosphoELM
5
+ extend Resource
6
+ self.subdir = 'share/databases/PhosphoELM'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+ PhosphoELM.claim PhosphoELM[".source/dump.tgz"], :proc do |file|
17
+ raise "Place phosphoELM_all_latest.dump.tgz from http://phospho.elm.eu.org at #{file}. Please consult license."
18
+ end
19
+
20
+ PhosphoELM.claim PhosphoELM.data, :proc do
21
+ tgz = PhosphoELM[".source/dump.tgz"].produce.find
22
+
23
+ organism = PhosphoELM.organism
24
+ uni2ensp = Organism.identifiers(organism).tsv :key_field => "UniProt/SwissProt Accession", :fields => ["Ensembl Protein ID"], :type => :flat, :persist => true
25
+ ensp2seq = Organism.protein_sequence(organism).tsv :persist => true
26
+
27
+ dumper = TSV::Dumper.new(:key_field => "Phosphosite", :fields => ["Kinases", "Source", "PMID"], :type => :list)
28
+ dumper.init
29
+ TmpFile.with_file do |dir|
30
+ Misc.in_dir dir do
31
+ CMD.cmd("tar xvfz #{tgz}")
32
+ f = Dir.glob("*.dump").first
33
+ TSV.traverse Open.open(f), :type => :array, :into => dumper do |line|
34
+ next unless line =~ /Homo sapiens/
35
+ acc, sequence, position, code, pmids, kinases, source, species, entry_date = line.split("\t")
36
+ ensps = uni2ensp[acc]
37
+ Log.warn "No Ensembl Protein ID for #{acc}" if ensps.nil?
38
+ next if ensps.nil?
39
+ sequence << "*"
40
+ good = ensps.select{|ensp| sequence == ensp2seq[ensp]}
41
+ Log.warn "No sequence match for #{acc} - #{ensps*", "}" if good.empty?
42
+ next if good.empty?
43
+ res = []
44
+ good.each do |ensp|
45
+ phospho_site = [ensp,":", code, position] * ""
46
+ res << [phospho_site, [kinases, source, pmids]]
47
+ end
48
+ res.extend MultipleResult
49
+
50
+ res
51
+ end
52
+ end
53
+ end
54
+ dumper.stream
55
+ end
56
+ end
57
+
58
+ iif PhosphoELM.data.produce(true).find if __FILE__ == $0
59
+
@@ -0,0 +1,39 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module PhosphoSitePlues
5
+ extend Resource
6
+ self.subdir = 'share/databases/PhosphoSitePlues'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+ #self.search_paths = {}
17
+ #self.search_paths[:default] = :lib
18
+
19
+
20
+
21
+ ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
22
+ Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
23
+ O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
24
+ Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
25
+ Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
26
+ Ubiquitination_site_dataset.gz)
27
+
28
+ ALL_FILES.each do |file|
29
+ PhosphoSitePlues.claim PhosphoSitePlues[".source"][file], :proc do |f|
30
+ raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
31
+ end
32
+ end
33
+
34
+ PhosphoSitePlues.claim PhosphoSitePlues.kinase_substrate, :proc do
35
+ PhosphoSitePlues[".source/Kinase_Substrate_Dataset.gz"]
36
+ end
37
+ end
38
+
39
+ iif PhosphoSitePlues.data.produce.find if __FILE__ == $0
@@ -1,27 +1,53 @@
1
- require 'rbbt'
1
+ require 'rbbt-util'
2
2
  require 'rbbt/resource'
3
3
 
4
4
  module Reactome
5
5
  extend Resource
6
6
  self.subdir = "share/databases/Reactome"
7
7
 
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
8
13
  Reactome.claim Reactome.protein_pathways, :proc do
9
14
  #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
10
- url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
11
- tsv = TSV.open(Open.open(url), :key_field => 0, :fields => [1], :merge => true, :type => :double)
12
- tsv.key_field = "UniProt/SwissProt Accession"
15
+ url = "http://reactome.org/download/current/Ensembl2Reactome.txt"
16
+ tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
17
+ tsv.key_field = "Ensembl Gene ID"
18
+ tsv.fields = ["Reactome Pathway ID"]
19
+ tsv.namespace = Reactome.organism
20
+ tsv.to_s
21
+ end
22
+
23
+ Reactome.claim Reactome.protein_pathways_all, :proc do
24
+ #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
25
+ url = "http://reactome.org/download/current/Ensembl2Reactome_All_Levels.txt"
26
+ tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
27
+ tsv.key_field = "Ensembl Gene ID"
13
28
  tsv.fields = ["Reactome Pathway ID"]
14
- tsv.namespace = "Hsa"
29
+ tsv.namespace = Reactome.organism
15
30
  tsv.to_s
16
31
  end
17
32
 
33
+
18
34
  Reactome.claim Reactome.pathway_names, :proc do
19
35
  #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
20
36
  url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
21
- tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [2], :type => :single)
37
+ tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [3], :type => :single)
22
38
  tsv.key_field = "Reactome Pathway ID"
23
39
  tsv.fields = ["Pathway Name"]
24
- tsv.namespace = "Hsa"
40
+ tsv.namespace = Reactome.organism
41
+ tsv.to_s
42
+ end
43
+
44
+ Reactome.claim Reactome.pathway_pathway, :proc do
45
+ #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
46
+ url = "http://reactome.org/download/current/ReactomePathwaysRelation.txt"
47
+ tsv = TSV.open(Open.open(url), :type => :flat, :merge => true)
48
+ tsv.key_field = "Reactome Pathway ID"
49
+ tsv.fields = ["Reactome Pathway ID"]
50
+ tsv.namespace = Reactome.organism
25
51
  tsv.to_s
26
52
  end
27
53
 
@@ -30,10 +56,39 @@ module Reactome
30
56
  tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
31
57
  tsv.key_field = "UniProt/SwissProt Accession"
32
58
  tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
33
- tsv.namespace = "Hsa"
59
+ tsv.namespace = Reactome.organism
34
60
  tsv.to_s
35
61
  end
36
62
 
63
+ #Reactome.claim Reactome.protein_pathways, :proc do
64
+ # #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
65
+ # url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
66
+ # tsv = TSV.open(Open.open(url), :key_field => 0, :fields => [1], :merge => true, :type => :double)
67
+ # tsv.key_field = "UniProt/SwissProt Accession"
68
+ # tsv.fields = ["Reactome Pathway ID"]
69
+ # tsv.namespace = Reactome.organism
70
+ # tsv.to_s
71
+ #end
72
+
73
+ #Reactome.claim Reactome.pathway_names, :proc do
74
+ # #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
75
+ # url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
76
+ # tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [2], :type => :single)
77
+ # tsv.key_field = "Reactome Pathway ID"
78
+ # tsv.fields = ["Pathway Name"]
79
+ # tsv.namespace = Reactome.organism
80
+ # tsv.to_s
81
+ #end
82
+
83
+ #Reactome.claim Reactome.protein_protein, :proc do
84
+ # url = "http://www.reactome.org/download/current/homo_sapiens.interactions.txt.gz"
85
+ # tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
86
+ # tsv.key_field = "UniProt/SwissProt Accession"
87
+ # tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
88
+ # tsv.namespace = Reactome.organism
89
+ # tsv.to_s
90
+ #end
91
+
37
92
  end
38
93
 
39
94
  if defined? Entity
@@ -82,3 +137,8 @@ if defined? Entity
82
137
  end
83
138
  end
84
139
  end
140
+
141
+ Log.tsv Reactome.protein_pathways.produce.tsv if __FILE__ == $0
142
+ Log.tsv Reactome.protein_pathways_all.produce.tsv if __FILE__ == $0
143
+ Log.tsv Reactome.pathway_names.produce(true).tsv if __FILE__ == $0
144
+ Log.tsv Reactome.pathway_pathway.produce.tsv if __FILE__ == $0
@@ -0,0 +1,77 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module Signor
5
+ extend Resource
6
+ self.subdir = 'share/databases/Signor'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+ Signor.claim Signor[".source/all.csv"], :proc do |file|
17
+ raise "Download all human data in CSV format from 'http://signor.uniroma2.it/downloads.php#all_download' and place in #{file}"
18
+ end
19
+
20
+ Signor.claim Signor.data, :proc do
21
+ #io = Misc.open_pipe do |sin|
22
+ # Signor[".source/all.csv"].open do |f|
23
+ # quoted = false
24
+ # while c = f.getc
25
+ # if c == '"'
26
+ # quoted = ! quoted
27
+ # end
28
+ # c = " " if c == "\n" and quoted
29
+ # sin << c
30
+ # end
31
+ # end
32
+ #end
33
+
34
+ sio = Signor[".source/all.csv"].open
35
+ io_tmp = Misc.remove_quoted_new_line(sio)
36
+ io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
37
+
38
+ tsv = TSV.open io, :header_hash => "", :sep => ";", :merge => true, :type => :double, :zipped => true, :monitor => true
39
+ tsv.each do |k,values|
40
+ clean_values = values.collect{|vs| vs.collect{|v| (v[0] == '"' and v[-1] = '"') ? v[1..-2] : v }.collect{|v| v.gsub("--SEMICOLON--", ';') } }
41
+
42
+ values.replace clean_values
43
+ end
44
+ tsv
45
+ end
46
+
47
+ Signor.claim Signor.protein_protein, :proc do
48
+ parser = TSV::Parser.new Signor.data
49
+ fields = parser.fields
50
+ dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (UniProt/SwissProt Accession)", "Effect", "Mechanism", "Residue"], :type => :double, :organism => Signor.organism
51
+ dumper.init
52
+ TSV.traverse parser, :into => dumper do |k,values|
53
+ info = {}
54
+ fields.zip(values).each do |field, value|
55
+ info[field] = value
56
+ end
57
+ next unless info["TYPEA"].first == "protein"
58
+ unia = info["IDA"].first
59
+
60
+ res = []
61
+ res.extend MultipleResult
62
+
63
+ info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["RESIDUE"]).each do |v|
64
+ typeb,idb,eff,mech,resi = v.flatten
65
+ next unless typeb == "protein"
66
+ res << [unia, [idb, eff, mech,resi]]
67
+ end
68
+
69
+ res
70
+ end
71
+
72
+ Misc.collapse_stream dumper.stream
73
+ end
74
+ end
75
+
76
+ iif Signor.protein_protein.produce(true).find if __FILE__ == $0
77
+
@@ -1,5 +1,15 @@
1
1
  #: :type=:single
2
2
  #Release build
3
+ release-89 may2017
4
+ release-88 mar2017
5
+ release-87 dec2016
6
+ release-86 oct2016
7
+ release-85 jul2016
8
+ release-84 mar2016
9
+ release-83 dec2015
10
+ release-82 sep2015
11
+ release-81 jul2015
12
+ release-80 may2015
3
13
  release-79 mar2015
4
14
  release-78 dec2014
5
15
  release-77 oct2014
@@ -672,6 +672,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
672
672
  transcript = transcript2ensembl[transcript_id]
673
673
  protein = transcript_protein[transcript]
674
674
 
675
+ next if transcript =~ /^LRG/
676
+
675
677
  start_exon = exon2ensembl[start_exon]
676
678
  eend_exon = exon2ensembl[eend_exon]
677
679
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.16
4
+ version: 3.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-15 00:00:00.000000000 Z
11
+ date: 2017-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -89,6 +89,7 @@ files:
89
89
  - etc/allowed_biomart_archives
90
90
  - etc/biomart/missing_in_archive
91
91
  - etc/organisms
92
+ - lib/rbbt/sources/CASCADE.rb
92
93
  - lib/rbbt/sources/COSTART.rb
93
94
  - lib/rbbt/sources/CTCAE.rb
94
95
  - lib/rbbt/sources/HPRD.rb
@@ -115,10 +116,13 @@ files:
115
116
  - lib/rbbt/sources/organism.rb
116
117
  - lib/rbbt/sources/pfam.rb
117
118
  - lib/rbbt/sources/pharmagkb.rb
119
+ - lib/rbbt/sources/phospho_ELM.rb
120
+ - lib/rbbt/sources/phospho_site_plus.rb
118
121
  - lib/rbbt/sources/pina.rb
119
122
  - lib/rbbt/sources/polysearch.rb
120
123
  - lib/rbbt/sources/pubmed.rb
121
124
  - lib/rbbt/sources/reactome.rb
125
+ - lib/rbbt/sources/signor.rb
122
126
  - lib/rbbt/sources/stitch.rb
123
127
  - lib/rbbt/sources/string.rb
124
128
  - lib/rbbt/sources/synapse.rb