rbbt-sources 3.1.16 → 3.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +1 -0
- data/lib/rbbt/sources/CASCADE.rb +177 -0
- data/lib/rbbt/sources/phospho_ELM.rb +59 -0
- data/lib/rbbt/sources/phospho_site_plus.rb +39 -0
- data/lib/rbbt/sources/reactome.rb +68 -8
- data/lib/rbbt/sources/signor.rb +77 -0
- data/share/Ensembl/release_dates +10 -0
- data/share/install/Organism/organism_helpers.rb +2 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf8a7403301523a87252057d4be1ac9e0f348708
|
4
|
+
data.tar.gz: '0939814d7b68cb713e05b5f1af2a572a8fffee75'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 779c0aa427ee0335a2533533179e05ef0c42c543316ea7cd9eb3c5b793f6603ea46cf63bffba71df9aed9cec0fc08df984255c1c3a83fe85c9bb5b51048395d1
|
7
|
+
data.tar.gz: 8a78a7bc1e4ba4819849c0375a1d6744e20bce89cdaf91e1c21cd26d4541f2ff91bd1d5aa9bed05e38b211051cc51774df2b0d2eb198bd4500d94c95196d48ac
|
@@ -0,0 +1,177 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module CASCADE
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/CASCADE'
|
7
|
+
|
8
|
+
#def self.organism(org="Hsa")
|
9
|
+
# Organism.default_code(org)
|
10
|
+
#end
|
11
|
+
|
12
|
+
#self.search_paths = {}
|
13
|
+
#self.search_paths[:default] = :lib
|
14
|
+
|
15
|
+
|
16
|
+
URL = 'https://bitbucket.org/asmundf/cascade'
|
17
|
+
CASCADE.claim CASCADE.interactions, :proc do
|
18
|
+
io = nil
|
19
|
+
TmpFile.with_file do |tmp|
|
20
|
+
Misc.in_dir tmp do
|
21
|
+
Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
|
22
|
+
`git clone #{URL}`
|
23
|
+
io = Open.open("cascade/cascade.tsv")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
tsv = TSV.open(io, :merge => true, :header_hash => '')
|
28
|
+
|
29
|
+
new_fields = ["ENTITYB"] + (tsv.fields - ["ENTITYB"])
|
30
|
+
tsv = tsv.reorder :key, new_fields
|
31
|
+
|
32
|
+
tsv.key_field = "ENTITYA (Associated Gene Name)"
|
33
|
+
tsv.rename_field "ENTITYB", "ENTITYB (Associated Gene Name)"
|
34
|
+
|
35
|
+
tsv.process "PMID" do |values|
|
36
|
+
values.collect{|v| v.scan(/\d+/) * ";;"}
|
37
|
+
end
|
38
|
+
|
39
|
+
tsv.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
CASCADE.claim CASCADE.members, :proc do
|
43
|
+
io = nil
|
44
|
+
TmpFile.with_file do |tmp|
|
45
|
+
Misc.in_dir tmp do
|
46
|
+
Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
|
47
|
+
`git clone #{URL}`
|
48
|
+
io = Open.open("cascade/cascade_translation.tsv")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
tsv = TSV.open(io, :merge => true, :header_hash => '', :type => :flat, :sep2 => /[,.]\s*/)
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
CASCADE.claim CASCADE.paradigm, :proc do
|
57
|
+
|
58
|
+
tsv = CASCADE.interactions.tsv
|
59
|
+
members = CASCADE.members.tsv
|
60
|
+
|
61
|
+
proteins = Set.new members.values.flatten.uniq
|
62
|
+
outputs = Set.new
|
63
|
+
associations = {}
|
64
|
+
|
65
|
+
tsv.through do |source, values|
|
66
|
+
values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
|
67
|
+
next if typea == 'gene'
|
68
|
+
|
69
|
+
if typeb == 'gene'
|
70
|
+
target.sub!('_g','')
|
71
|
+
type = '-t'
|
72
|
+
elsif typeb == 'output' or typea == 'output'
|
73
|
+
type = '-ap'
|
74
|
+
else
|
75
|
+
type = '-a'
|
76
|
+
end
|
77
|
+
|
78
|
+
proteins << source unless source.include? '_f' or source.include? '_c'
|
79
|
+
proteins << target unless target.include? '_f' or target.include? '_c'
|
80
|
+
|
81
|
+
outputs << source if typea == 'output'
|
82
|
+
outputs << target if typeb == 'output'
|
83
|
+
|
84
|
+
effect_symbol = '>'
|
85
|
+
effect_symbol = '|' if effect.include? 'inhibit'
|
86
|
+
|
87
|
+
associations[[source,target]] = [type, effect_symbol]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
str = StringIO.new
|
92
|
+
|
93
|
+
proteins.each do |p|
|
94
|
+
next if outputs.include? p
|
95
|
+
str.puts ["protein", p] * "\t"
|
96
|
+
end
|
97
|
+
|
98
|
+
outputs.each do |o|
|
99
|
+
str.puts ["abstract", o] * "\t"
|
100
|
+
end
|
101
|
+
|
102
|
+
members.each do |e, targets|
|
103
|
+
e = e.dup
|
104
|
+
case
|
105
|
+
when e.include?('_c')
|
106
|
+
str.puts ["complex", e] * "\t"
|
107
|
+
type = 'component'
|
108
|
+
when e.include?('_f')
|
109
|
+
str.puts ["family", e] * "\t"
|
110
|
+
type = 'member'
|
111
|
+
else
|
112
|
+
next
|
113
|
+
end
|
114
|
+
|
115
|
+
targets.each do |target|
|
116
|
+
associations[[target,e]] = [type, '>']
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
associations.each do |p,i|
|
122
|
+
source, target = p
|
123
|
+
type, symbol = i
|
124
|
+
|
125
|
+
str.puts [source, target, [type,symbol]*""] * "\t"
|
126
|
+
end
|
127
|
+
|
128
|
+
str.rewind
|
129
|
+
str
|
130
|
+
end
|
131
|
+
|
132
|
+
CASCADE.claim CASCADE["topology.sif"], :proc do
|
133
|
+
|
134
|
+
tsv = CASCADE.interactions.tsv
|
135
|
+
|
136
|
+
str = StringIO.new
|
137
|
+
|
138
|
+
tsv.through do |source, values|
|
139
|
+
values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
|
140
|
+
|
141
|
+
effect_symbol = '->'
|
142
|
+
effect_symbol = '-|' if effect.include? 'inhibit'
|
143
|
+
|
144
|
+
str.puts [source, effect_symbol, target] * " "
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
str.rewind
|
149
|
+
str
|
150
|
+
end
|
151
|
+
|
152
|
+
CASCADE.claim CASCADE.output_nodes, :proc do
|
153
|
+
tsv = CASCADE.interactions.tsv
|
154
|
+
|
155
|
+
output = TSV.setup({}, :key_field => "Node", :fields => ["Sign"], :type => :single)
|
156
|
+
|
157
|
+
tsv.through do |source, values|
|
158
|
+
values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
|
159
|
+
case target
|
160
|
+
when "Antisurvival"
|
161
|
+
output[source] = -1
|
162
|
+
when "Prosurvival"
|
163
|
+
output[source] = 1
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
output.to_s
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
iif CASCADE.interactions.produce.find if __FILE__ == $0
|
173
|
+
iif CASCADE.members.produce.find if __FILE__ == $0
|
174
|
+
iif CASCADE.paradigm.produce.find if __FILE__ == $0
|
175
|
+
iif CASCADE["topology.sif"].produce.find if __FILE__ == $0
|
176
|
+
iif CASCADE.output_nodes.produce(true).find if __FILE__ == $0
|
177
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module PhosphoELM
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/PhosphoELM'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
PhosphoELM.claim PhosphoELM[".source/dump.tgz"], :proc do |file|
|
17
|
+
raise "Place phosphoELM_all_latest.dump.tgz from http://phospho.elm.eu.org at #{file}. Please consult license."
|
18
|
+
end
|
19
|
+
|
20
|
+
PhosphoELM.claim PhosphoELM.data, :proc do
|
21
|
+
tgz = PhosphoELM[".source/dump.tgz"].produce.find
|
22
|
+
|
23
|
+
organism = PhosphoELM.organism
|
24
|
+
uni2ensp = Organism.identifiers(organism).tsv :key_field => "UniProt/SwissProt Accession", :fields => ["Ensembl Protein ID"], :type => :flat, :persist => true
|
25
|
+
ensp2seq = Organism.protein_sequence(organism).tsv :persist => true
|
26
|
+
|
27
|
+
dumper = TSV::Dumper.new(:key_field => "Phosphosite", :fields => ["Kinases", "Source", "PMID"], :type => :list)
|
28
|
+
dumper.init
|
29
|
+
TmpFile.with_file do |dir|
|
30
|
+
Misc.in_dir dir do
|
31
|
+
CMD.cmd("tar xvfz #{tgz}")
|
32
|
+
f = Dir.glob("*.dump").first
|
33
|
+
TSV.traverse Open.open(f), :type => :array, :into => dumper do |line|
|
34
|
+
next unless line =~ /Homo sapiens/
|
35
|
+
acc, sequence, position, code, pmids, kinases, source, species, entry_date = line.split("\t")
|
36
|
+
ensps = uni2ensp[acc]
|
37
|
+
Log.warn "No Ensembl Protein ID for #{acc}" if ensps.nil?
|
38
|
+
next if ensps.nil?
|
39
|
+
sequence << "*"
|
40
|
+
good = ensps.select{|ensp| sequence == ensp2seq[ensp]}
|
41
|
+
Log.warn "No sequence match for #{acc} - #{ensps*", "}" if good.empty?
|
42
|
+
next if good.empty?
|
43
|
+
res = []
|
44
|
+
good.each do |ensp|
|
45
|
+
phospho_site = [ensp,":", code, position] * ""
|
46
|
+
res << [phospho_site, [kinases, source, pmids]]
|
47
|
+
end
|
48
|
+
res.extend MultipleResult
|
49
|
+
|
50
|
+
res
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
dumper.stream
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
iif PhosphoELM.data.produce(true).find if __FILE__ == $0
|
59
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module PhosphoSitePlues
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/PhosphoSitePlues'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
#self.search_paths = {}
|
17
|
+
#self.search_paths[:default] = :lib
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
|
22
|
+
Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
|
23
|
+
O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
|
24
|
+
Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
|
25
|
+
Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
|
26
|
+
Ubiquitination_site_dataset.gz)
|
27
|
+
|
28
|
+
ALL_FILES.each do |file|
|
29
|
+
PhosphoSitePlues.claim PhosphoSitePlues[".source"][file], :proc do |f|
|
30
|
+
raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
PhosphoSitePlues.claim PhosphoSitePlues.kinase_substrate, :proc do
|
35
|
+
PhosphoSitePlues[".source/Kinase_Substrate_Dataset.gz"]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
iif PhosphoSitePlues.data.produce.find if __FILE__ == $0
|
@@ -1,27 +1,53 @@
|
|
1
|
-
require 'rbbt'
|
1
|
+
require 'rbbt-util'
|
2
2
|
require 'rbbt/resource'
|
3
3
|
|
4
4
|
module Reactome
|
5
5
|
extend Resource
|
6
6
|
self.subdir = "share/databases/Reactome"
|
7
7
|
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
8
13
|
Reactome.claim Reactome.protein_pathways, :proc do
|
9
14
|
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
10
|
-
url = "http://
|
11
|
-
tsv = TSV.open(
|
12
|
-
tsv.key_field = "
|
15
|
+
url = "http://reactome.org/download/current/Ensembl2Reactome.txt"
|
16
|
+
tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
|
17
|
+
tsv.key_field = "Ensembl Gene ID"
|
18
|
+
tsv.fields = ["Reactome Pathway ID"]
|
19
|
+
tsv.namespace = Reactome.organism
|
20
|
+
tsv.to_s
|
21
|
+
end
|
22
|
+
|
23
|
+
Reactome.claim Reactome.protein_pathways_all, :proc do
|
24
|
+
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
25
|
+
url = "http://reactome.org/download/current/Ensembl2Reactome_All_Levels.txt"
|
26
|
+
tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
|
27
|
+
tsv.key_field = "Ensembl Gene ID"
|
13
28
|
tsv.fields = ["Reactome Pathway ID"]
|
14
|
-
tsv.namespace =
|
29
|
+
tsv.namespace = Reactome.organism
|
15
30
|
tsv.to_s
|
16
31
|
end
|
17
32
|
|
33
|
+
|
18
34
|
Reactome.claim Reactome.pathway_names, :proc do
|
19
35
|
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
20
36
|
url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
|
21
|
-
tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [
|
37
|
+
tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [3], :type => :single)
|
22
38
|
tsv.key_field = "Reactome Pathway ID"
|
23
39
|
tsv.fields = ["Pathway Name"]
|
24
|
-
tsv.namespace =
|
40
|
+
tsv.namespace = Reactome.organism
|
41
|
+
tsv.to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
Reactome.claim Reactome.pathway_pathway, :proc do
|
45
|
+
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
46
|
+
url = "http://reactome.org/download/current/ReactomePathwaysRelation.txt"
|
47
|
+
tsv = TSV.open(Open.open(url), :type => :flat, :merge => true)
|
48
|
+
tsv.key_field = "Reactome Pathway ID"
|
49
|
+
tsv.fields = ["Reactome Pathway ID"]
|
50
|
+
tsv.namespace = Reactome.organism
|
25
51
|
tsv.to_s
|
26
52
|
end
|
27
53
|
|
@@ -30,10 +56,39 @@ module Reactome
|
|
30
56
|
tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
|
31
57
|
tsv.key_field = "UniProt/SwissProt Accession"
|
32
58
|
tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
|
33
|
-
tsv.namespace =
|
59
|
+
tsv.namespace = Reactome.organism
|
34
60
|
tsv.to_s
|
35
61
|
end
|
36
62
|
|
63
|
+
#Reactome.claim Reactome.protein_pathways, :proc do
|
64
|
+
# #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
65
|
+
# url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
|
66
|
+
# tsv = TSV.open(Open.open(url), :key_field => 0, :fields => [1], :merge => true, :type => :double)
|
67
|
+
# tsv.key_field = "UniProt/SwissProt Accession"
|
68
|
+
# tsv.fields = ["Reactome Pathway ID"]
|
69
|
+
# tsv.namespace = Reactome.organism
|
70
|
+
# tsv.to_s
|
71
|
+
#end
|
72
|
+
|
73
|
+
#Reactome.claim Reactome.pathway_names, :proc do
|
74
|
+
# #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
75
|
+
# url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
|
76
|
+
# tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [2], :type => :single)
|
77
|
+
# tsv.key_field = "Reactome Pathway ID"
|
78
|
+
# tsv.fields = ["Pathway Name"]
|
79
|
+
# tsv.namespace = Reactome.organism
|
80
|
+
# tsv.to_s
|
81
|
+
#end
|
82
|
+
|
83
|
+
#Reactome.claim Reactome.protein_protein, :proc do
|
84
|
+
# url = "http://www.reactome.org/download/current/homo_sapiens.interactions.txt.gz"
|
85
|
+
# tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
|
86
|
+
# tsv.key_field = "UniProt/SwissProt Accession"
|
87
|
+
# tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
|
88
|
+
# tsv.namespace = Reactome.organism
|
89
|
+
# tsv.to_s
|
90
|
+
#end
|
91
|
+
|
37
92
|
end
|
38
93
|
|
39
94
|
if defined? Entity
|
@@ -82,3 +137,8 @@ if defined? Entity
|
|
82
137
|
end
|
83
138
|
end
|
84
139
|
end
|
140
|
+
|
141
|
+
Log.tsv Reactome.protein_pathways.produce.tsv if __FILE__ == $0
|
142
|
+
Log.tsv Reactome.protein_pathways_all.produce.tsv if __FILE__ == $0
|
143
|
+
Log.tsv Reactome.pathway_names.produce(true).tsv if __FILE__ == $0
|
144
|
+
Log.tsv Reactome.pathway_pathway.produce.tsv if __FILE__ == $0
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module Signor
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/Signor'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
Signor.claim Signor[".source/all.csv"], :proc do |file|
|
17
|
+
raise "Download all human data in CSV format from 'http://signor.uniroma2.it/downloads.php#all_download' and place in #{file}"
|
18
|
+
end
|
19
|
+
|
20
|
+
Signor.claim Signor.data, :proc do
|
21
|
+
#io = Misc.open_pipe do |sin|
|
22
|
+
# Signor[".source/all.csv"].open do |f|
|
23
|
+
# quoted = false
|
24
|
+
# while c = f.getc
|
25
|
+
# if c == '"'
|
26
|
+
# quoted = ! quoted
|
27
|
+
# end
|
28
|
+
# c = " " if c == "\n" and quoted
|
29
|
+
# sin << c
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#end
|
33
|
+
|
34
|
+
sio = Signor[".source/all.csv"].open
|
35
|
+
io_tmp = Misc.remove_quoted_new_line(sio)
|
36
|
+
io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
|
37
|
+
|
38
|
+
tsv = TSV.open io, :header_hash => "", :sep => ";", :merge => true, :type => :double, :zipped => true, :monitor => true
|
39
|
+
tsv.each do |k,values|
|
40
|
+
clean_values = values.collect{|vs| vs.collect{|v| (v[0] == '"' and v[-1] = '"') ? v[1..-2] : v }.collect{|v| v.gsub("--SEMICOLON--", ';') } }
|
41
|
+
|
42
|
+
values.replace clean_values
|
43
|
+
end
|
44
|
+
tsv
|
45
|
+
end
|
46
|
+
|
47
|
+
Signor.claim Signor.protein_protein, :proc do
|
48
|
+
parser = TSV::Parser.new Signor.data
|
49
|
+
fields = parser.fields
|
50
|
+
dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (UniProt/SwissProt Accession)", "Effect", "Mechanism", "Residue"], :type => :double, :organism => Signor.organism
|
51
|
+
dumper.init
|
52
|
+
TSV.traverse parser, :into => dumper do |k,values|
|
53
|
+
info = {}
|
54
|
+
fields.zip(values).each do |field, value|
|
55
|
+
info[field] = value
|
56
|
+
end
|
57
|
+
next unless info["TYPEA"].first == "protein"
|
58
|
+
unia = info["IDA"].first
|
59
|
+
|
60
|
+
res = []
|
61
|
+
res.extend MultipleResult
|
62
|
+
|
63
|
+
info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["RESIDUE"]).each do |v|
|
64
|
+
typeb,idb,eff,mech,resi = v.flatten
|
65
|
+
next unless typeb == "protein"
|
66
|
+
res << [unia, [idb, eff, mech,resi]]
|
67
|
+
end
|
68
|
+
|
69
|
+
res
|
70
|
+
end
|
71
|
+
|
72
|
+
Misc.collapse_stream dumper.stream
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
iif Signor.protein_protein.produce(true).find if __FILE__ == $0
|
77
|
+
|
data/share/Ensembl/release_dates
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
#: :type=:single
|
2
2
|
#Release build
|
3
|
+
release-89 may2017
|
4
|
+
release-88 mar2017
|
5
|
+
release-87 dec2016
|
6
|
+
release-86 oct2016
|
7
|
+
release-85 jul2016
|
8
|
+
release-84 mar2016
|
9
|
+
release-83 dec2015
|
10
|
+
release-82 sep2015
|
11
|
+
release-81 jul2015
|
12
|
+
release-80 may2015
|
3
13
|
release-79 mar2015
|
4
14
|
release-78 dec2014
|
5
15
|
release-77 oct2014
|
@@ -672,6 +672,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
|
|
672
672
|
transcript = transcript2ensembl[transcript_id]
|
673
673
|
protein = transcript_protein[transcript]
|
674
674
|
|
675
|
+
next if transcript =~ /^LRG/
|
676
|
+
|
675
677
|
start_exon = exon2ensembl[start_exon]
|
676
678
|
eend_exon = exon2ensembl[eend_exon]
|
677
679
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -89,6 +89,7 @@ files:
|
|
89
89
|
- etc/allowed_biomart_archives
|
90
90
|
- etc/biomart/missing_in_archive
|
91
91
|
- etc/organisms
|
92
|
+
- lib/rbbt/sources/CASCADE.rb
|
92
93
|
- lib/rbbt/sources/COSTART.rb
|
93
94
|
- lib/rbbt/sources/CTCAE.rb
|
94
95
|
- lib/rbbt/sources/HPRD.rb
|
@@ -115,10 +116,13 @@ files:
|
|
115
116
|
- lib/rbbt/sources/organism.rb
|
116
117
|
- lib/rbbt/sources/pfam.rb
|
117
118
|
- lib/rbbt/sources/pharmagkb.rb
|
119
|
+
- lib/rbbt/sources/phospho_ELM.rb
|
120
|
+
- lib/rbbt/sources/phospho_site_plus.rb
|
118
121
|
- lib/rbbt/sources/pina.rb
|
119
122
|
- lib/rbbt/sources/polysearch.rb
|
120
123
|
- lib/rbbt/sources/pubmed.rb
|
121
124
|
- lib/rbbt/sources/reactome.rb
|
125
|
+
- lib/rbbt/sources/signor.rb
|
122
126
|
- lib/rbbt/sources/stitch.rb
|
123
127
|
- lib/rbbt/sources/string.rb
|
124
128
|
- lib/rbbt/sources/synapse.rb
|