rbbt-sources 3.1.16 → 3.1.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +1 -0
- data/lib/rbbt/sources/CASCADE.rb +177 -0
- data/lib/rbbt/sources/phospho_ELM.rb +59 -0
- data/lib/rbbt/sources/phospho_site_plus.rb +39 -0
- data/lib/rbbt/sources/reactome.rb +68 -8
- data/lib/rbbt/sources/signor.rb +77 -0
- data/share/Ensembl/release_dates +10 -0
- data/share/install/Organism/organism_helpers.rb +2 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf8a7403301523a87252057d4be1ac9e0f348708
|
4
|
+
data.tar.gz: '0939814d7b68cb713e05b5f1af2a572a8fffee75'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 779c0aa427ee0335a2533533179e05ef0c42c543316ea7cd9eb3c5b793f6603ea46cf63bffba71df9aed9cec0fc08df984255c1c3a83fe85c9bb5b51048395d1
|
7
|
+
data.tar.gz: 8a78a7bc1e4ba4819849c0375a1d6744e20bce89cdaf91e1c21cd26d4541f2ff91bd1d5aa9bed05e38b211051cc51774df2b0d2eb198bd4500d94c95196d48ac
|
@@ -0,0 +1,177 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module CASCADE
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/CASCADE'
|
7
|
+
|
8
|
+
#def self.organism(org="Hsa")
|
9
|
+
# Organism.default_code(org)
|
10
|
+
#end
|
11
|
+
|
12
|
+
#self.search_paths = {}
|
13
|
+
#self.search_paths[:default] = :lib
|
14
|
+
|
15
|
+
|
16
|
+
URL = 'https://bitbucket.org/asmundf/cascade'
|
17
|
+
CASCADE.claim CASCADE.interactions, :proc do
|
18
|
+
io = nil
|
19
|
+
TmpFile.with_file do |tmp|
|
20
|
+
Misc.in_dir tmp do
|
21
|
+
Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
|
22
|
+
`git clone #{URL}`
|
23
|
+
io = Open.open("cascade/cascade.tsv")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
tsv = TSV.open(io, :merge => true, :header_hash => '')
|
28
|
+
|
29
|
+
new_fields = ["ENTITYB"] + (tsv.fields - ["ENTITYB"])
|
30
|
+
tsv = tsv.reorder :key, new_fields
|
31
|
+
|
32
|
+
tsv.key_field = "ENTITYA (Associated Gene Name)"
|
33
|
+
tsv.rename_field "ENTITYB", "ENTITYB (Associated Gene Name)"
|
34
|
+
|
35
|
+
tsv.process "PMID" do |values|
|
36
|
+
values.collect{|v| v.scan(/\d+/) * ";;"}
|
37
|
+
end
|
38
|
+
|
39
|
+
tsv.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
CASCADE.claim CASCADE.members, :proc do
|
43
|
+
io = nil
|
44
|
+
TmpFile.with_file do |tmp|
|
45
|
+
Misc.in_dir tmp do
|
46
|
+
Log.warn "Please enter bitbucket credentials to access the asmundf/cascade repo"
|
47
|
+
`git clone #{URL}`
|
48
|
+
io = Open.open("cascade/cascade_translation.tsv")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
tsv = TSV.open(io, :merge => true, :header_hash => '', :type => :flat, :sep2 => /[,.]\s*/)
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
CASCADE.claim CASCADE.paradigm, :proc do
|
57
|
+
|
58
|
+
tsv = CASCADE.interactions.tsv
|
59
|
+
members = CASCADE.members.tsv
|
60
|
+
|
61
|
+
proteins = Set.new members.values.flatten.uniq
|
62
|
+
outputs = Set.new
|
63
|
+
associations = {}
|
64
|
+
|
65
|
+
tsv.through do |source, values|
|
66
|
+
values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
|
67
|
+
next if typea == 'gene'
|
68
|
+
|
69
|
+
if typeb == 'gene'
|
70
|
+
target.sub!('_g','')
|
71
|
+
type = '-t'
|
72
|
+
elsif typeb == 'output' or typea == 'output'
|
73
|
+
type = '-ap'
|
74
|
+
else
|
75
|
+
type = '-a'
|
76
|
+
end
|
77
|
+
|
78
|
+
proteins << source unless source.include? '_f' or source.include? '_c'
|
79
|
+
proteins << target unless target.include? '_f' or target.include? '_c'
|
80
|
+
|
81
|
+
outputs << source if typea == 'output'
|
82
|
+
outputs << target if typeb == 'output'
|
83
|
+
|
84
|
+
effect_symbol = '>'
|
85
|
+
effect_symbol = '|' if effect.include? 'inhibit'
|
86
|
+
|
87
|
+
associations[[source,target]] = [type, effect_symbol]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
str = StringIO.new
|
92
|
+
|
93
|
+
proteins.each do |p|
|
94
|
+
next if outputs.include? p
|
95
|
+
str.puts ["protein", p] * "\t"
|
96
|
+
end
|
97
|
+
|
98
|
+
outputs.each do |o|
|
99
|
+
str.puts ["abstract", o] * "\t"
|
100
|
+
end
|
101
|
+
|
102
|
+
members.each do |e, targets|
|
103
|
+
e = e.dup
|
104
|
+
case
|
105
|
+
when e.include?('_c')
|
106
|
+
str.puts ["complex", e] * "\t"
|
107
|
+
type = 'component'
|
108
|
+
when e.include?('_f')
|
109
|
+
str.puts ["family", e] * "\t"
|
110
|
+
type = 'member'
|
111
|
+
else
|
112
|
+
next
|
113
|
+
end
|
114
|
+
|
115
|
+
targets.each do |target|
|
116
|
+
associations[[target,e]] = [type, '>']
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
associations.each do |p,i|
|
122
|
+
source, target = p
|
123
|
+
type, symbol = i
|
124
|
+
|
125
|
+
str.puts [source, target, [type,symbol]*""] * "\t"
|
126
|
+
end
|
127
|
+
|
128
|
+
str.rewind
|
129
|
+
str
|
130
|
+
end
|
131
|
+
|
132
|
+
CASCADE.claim CASCADE["topology.sif"], :proc do
|
133
|
+
|
134
|
+
tsv = CASCADE.interactions.tsv
|
135
|
+
|
136
|
+
str = StringIO.new
|
137
|
+
|
138
|
+
tsv.through do |source, values|
|
139
|
+
values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
|
140
|
+
|
141
|
+
effect_symbol = '->'
|
142
|
+
effect_symbol = '-|' if effect.include? 'inhibit'
|
143
|
+
|
144
|
+
str.puts [source, effect_symbol, target] * " "
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
str.rewind
|
149
|
+
str
|
150
|
+
end
|
151
|
+
|
152
|
+
CASCADE.claim CASCADE.output_nodes, :proc do
|
153
|
+
tsv = CASCADE.interactions.tsv
|
154
|
+
|
155
|
+
output = TSV.setup({}, :key_field => "Node", :fields => ["Sign"], :type => :single)
|
156
|
+
|
157
|
+
tsv.through do |source, values|
|
158
|
+
values.zip_fields.each do |target,typea,ida,databasea,typeb,idb,databaseb,effect|
|
159
|
+
case target
|
160
|
+
when "Antisurvival"
|
161
|
+
output[source] = -1
|
162
|
+
when "Prosurvival"
|
163
|
+
output[source] = 1
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
output.to_s
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
iif CASCADE.interactions.produce.find if __FILE__ == $0
|
173
|
+
iif CASCADE.members.produce.find if __FILE__ == $0
|
174
|
+
iif CASCADE.paradigm.produce.find if __FILE__ == $0
|
175
|
+
iif CASCADE["topology.sif"].produce.find if __FILE__ == $0
|
176
|
+
iif CASCADE.output_nodes.produce(true).find if __FILE__ == $0
|
177
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module PhosphoELM
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/PhosphoELM'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
PhosphoELM.claim PhosphoELM[".source/dump.tgz"], :proc do |file|
|
17
|
+
raise "Place phosphoELM_all_latest.dump.tgz from http://phospho.elm.eu.org at #{file}. Please consult license."
|
18
|
+
end
|
19
|
+
|
20
|
+
PhosphoELM.claim PhosphoELM.data, :proc do
|
21
|
+
tgz = PhosphoELM[".source/dump.tgz"].produce.find
|
22
|
+
|
23
|
+
organism = PhosphoELM.organism
|
24
|
+
uni2ensp = Organism.identifiers(organism).tsv :key_field => "UniProt/SwissProt Accession", :fields => ["Ensembl Protein ID"], :type => :flat, :persist => true
|
25
|
+
ensp2seq = Organism.protein_sequence(organism).tsv :persist => true
|
26
|
+
|
27
|
+
dumper = TSV::Dumper.new(:key_field => "Phosphosite", :fields => ["Kinases", "Source", "PMID"], :type => :list)
|
28
|
+
dumper.init
|
29
|
+
TmpFile.with_file do |dir|
|
30
|
+
Misc.in_dir dir do
|
31
|
+
CMD.cmd("tar xvfz #{tgz}")
|
32
|
+
f = Dir.glob("*.dump").first
|
33
|
+
TSV.traverse Open.open(f), :type => :array, :into => dumper do |line|
|
34
|
+
next unless line =~ /Homo sapiens/
|
35
|
+
acc, sequence, position, code, pmids, kinases, source, species, entry_date = line.split("\t")
|
36
|
+
ensps = uni2ensp[acc]
|
37
|
+
Log.warn "No Ensembl Protein ID for #{acc}" if ensps.nil?
|
38
|
+
next if ensps.nil?
|
39
|
+
sequence << "*"
|
40
|
+
good = ensps.select{|ensp| sequence == ensp2seq[ensp]}
|
41
|
+
Log.warn "No sequence match for #{acc} - #{ensps*", "}" if good.empty?
|
42
|
+
next if good.empty?
|
43
|
+
res = []
|
44
|
+
good.each do |ensp|
|
45
|
+
phospho_site = [ensp,":", code, position] * ""
|
46
|
+
res << [phospho_site, [kinases, source, pmids]]
|
47
|
+
end
|
48
|
+
res.extend MultipleResult
|
49
|
+
|
50
|
+
res
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
dumper.stream
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
iif PhosphoELM.data.produce(true).find if __FILE__ == $0
|
59
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module PhosphoSitePlues
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/PhosphoSitePlues'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
#self.search_paths = {}
|
17
|
+
#self.search_paths[:default] = :lib
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
ALL_FILES = %(Acetylation_site_dataset.gz Disease-associated_sites.gz
|
22
|
+
Kinase_Substrate_Dataset.gz Methylation_site_dataset.gz
|
23
|
+
O-GalNAc_site_dataset.gz O-GlcNAc_site_dataset.gz
|
24
|
+
Phosphorylation_site_dataset.gz Phosphosite_PTM_seq.fasta.gz
|
25
|
+
Phosphosite_seq.fasta.gz Regulatory_sites.gz Sumoylation_site_dataset.gz
|
26
|
+
Ubiquitination_site_dataset.gz)
|
27
|
+
|
28
|
+
ALL_FILES.each do |file|
|
29
|
+
PhosphoSitePlues.claim PhosphoSitePlues[".source"][file], :proc do |f|
|
30
|
+
raise "Place #{file} from http://www.phosphosite.org/ at #{f}. Please consult license."
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
PhosphoSitePlues.claim PhosphoSitePlues.kinase_substrate, :proc do
|
35
|
+
PhosphoSitePlues[".source/Kinase_Substrate_Dataset.gz"]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
iif PhosphoSitePlues.data.produce.find if __FILE__ == $0
|
@@ -1,27 +1,53 @@
|
|
1
|
-
require 'rbbt'
|
1
|
+
require 'rbbt-util'
|
2
2
|
require 'rbbt/resource'
|
3
3
|
|
4
4
|
module Reactome
|
5
5
|
extend Resource
|
6
6
|
self.subdir = "share/databases/Reactome"
|
7
7
|
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
8
13
|
Reactome.claim Reactome.protein_pathways, :proc do
|
9
14
|
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
10
|
-
url = "http://
|
11
|
-
tsv = TSV.open(
|
12
|
-
tsv.key_field = "
|
15
|
+
url = "http://reactome.org/download/current/Ensembl2Reactome.txt"
|
16
|
+
tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
|
17
|
+
tsv.key_field = "Ensembl Gene ID"
|
18
|
+
tsv.fields = ["Reactome Pathway ID"]
|
19
|
+
tsv.namespace = Reactome.organism
|
20
|
+
tsv.to_s
|
21
|
+
end
|
22
|
+
|
23
|
+
Reactome.claim Reactome.protein_pathways_all, :proc do
|
24
|
+
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
25
|
+
url = "http://reactome.org/download/current/Ensembl2Reactome_All_Levels.txt"
|
26
|
+
tsv = TSV.open(url, :key_field => 0, :fields => [1], :merge => true, :type => :flat, :tsv_grep => "Homo sapiens")
|
27
|
+
tsv.key_field = "Ensembl Gene ID"
|
13
28
|
tsv.fields = ["Reactome Pathway ID"]
|
14
|
-
tsv.namespace =
|
29
|
+
tsv.namespace = Reactome.organism
|
15
30
|
tsv.to_s
|
16
31
|
end
|
17
32
|
|
33
|
+
|
18
34
|
Reactome.claim Reactome.pathway_names, :proc do
|
19
35
|
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
20
36
|
url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
|
21
|
-
tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [
|
37
|
+
tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [3], :type => :single)
|
22
38
|
tsv.key_field = "Reactome Pathway ID"
|
23
39
|
tsv.fields = ["Pathway Name"]
|
24
|
-
tsv.namespace =
|
40
|
+
tsv.namespace = Reactome.organism
|
41
|
+
tsv.to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
Reactome.claim Reactome.pathway_pathway, :proc do
|
45
|
+
#url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
46
|
+
url = "http://reactome.org/download/current/ReactomePathwaysRelation.txt"
|
47
|
+
tsv = TSV.open(Open.open(url), :type => :flat, :merge => true)
|
48
|
+
tsv.key_field = "Reactome Pathway ID"
|
49
|
+
tsv.fields = ["Reactome Pathway ID"]
|
50
|
+
tsv.namespace = Reactome.organism
|
25
51
|
tsv.to_s
|
26
52
|
end
|
27
53
|
|
@@ -30,10 +56,39 @@ module Reactome
|
|
30
56
|
tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
|
31
57
|
tsv.key_field = "UniProt/SwissProt Accession"
|
32
58
|
tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
|
33
|
-
tsv.namespace =
|
59
|
+
tsv.namespace = Reactome.organism
|
34
60
|
tsv.to_s
|
35
61
|
end
|
36
62
|
|
63
|
+
#Reactome.claim Reactome.protein_pathways, :proc do
|
64
|
+
# #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
65
|
+
# url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
|
66
|
+
# tsv = TSV.open(Open.open(url), :key_field => 0, :fields => [1], :merge => true, :type => :double)
|
67
|
+
# tsv.key_field = "UniProt/SwissProt Accession"
|
68
|
+
# tsv.fields = ["Reactome Pathway ID"]
|
69
|
+
# tsv.namespace = Reactome.organism
|
70
|
+
# tsv.to_s
|
71
|
+
#end
|
72
|
+
|
73
|
+
#Reactome.claim Reactome.pathway_names, :proc do
|
74
|
+
# #url = "http://www.reactome.org/download/current/uniprot_2_pathways.stid.txt"
|
75
|
+
# url = "http://www.reactome.org/download/current/UniProt2Reactome.txt"
|
76
|
+
# tsv = TSV.open(Open.open(url), :key_field => 1, :fields => [2], :type => :single)
|
77
|
+
# tsv.key_field = "Reactome Pathway ID"
|
78
|
+
# tsv.fields = ["Pathway Name"]
|
79
|
+
# tsv.namespace = Reactome.organism
|
80
|
+
# tsv.to_s
|
81
|
+
#end
|
82
|
+
|
83
|
+
#Reactome.claim Reactome.protein_protein, :proc do
|
84
|
+
# url = "http://www.reactome.org/download/current/homo_sapiens.interactions.txt.gz"
|
85
|
+
# tsv = TSV.open(CMD.cmd('cut -f 1,4,7,8,9|sed "s/UniProt://g;s/,/;/g"', :in => Open.open(url), :pipe => true), :type => :double, :merge => true)
|
86
|
+
# tsv.key_field = "UniProt/SwissProt Accession"
|
87
|
+
# tsv.fields = ["Interactor UniProt/SwissProt Accession", "Interaction type", "Reactions", "PMID"]
|
88
|
+
# tsv.namespace = Reactome.organism
|
89
|
+
# tsv.to_s
|
90
|
+
#end
|
91
|
+
|
37
92
|
end
|
38
93
|
|
39
94
|
if defined? Entity
|
@@ -82,3 +137,8 @@ if defined? Entity
|
|
82
137
|
end
|
83
138
|
end
|
84
139
|
end
|
140
|
+
|
141
|
+
Log.tsv Reactome.protein_pathways.produce.tsv if __FILE__ == $0
|
142
|
+
Log.tsv Reactome.protein_pathways_all.produce.tsv if __FILE__ == $0
|
143
|
+
Log.tsv Reactome.pathway_names.produce(true).tsv if __FILE__ == $0
|
144
|
+
Log.tsv Reactome.pathway_pathway.produce.tsv if __FILE__ == $0
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module Signor
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/Signor'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
Signor.claim Signor[".source/all.csv"], :proc do |file|
|
17
|
+
raise "Download all human data in CSV format from 'http://signor.uniroma2.it/downloads.php#all_download' and place in #{file}"
|
18
|
+
end
|
19
|
+
|
20
|
+
Signor.claim Signor.data, :proc do
|
21
|
+
#io = Misc.open_pipe do |sin|
|
22
|
+
# Signor[".source/all.csv"].open do |f|
|
23
|
+
# quoted = false
|
24
|
+
# while c = f.getc
|
25
|
+
# if c == '"'
|
26
|
+
# quoted = ! quoted
|
27
|
+
# end
|
28
|
+
# c = " " if c == "\n" and quoted
|
29
|
+
# sin << c
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#end
|
33
|
+
|
34
|
+
sio = Signor[".source/all.csv"].open
|
35
|
+
io_tmp = Misc.remove_quoted_new_line(sio)
|
36
|
+
io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
|
37
|
+
|
38
|
+
tsv = TSV.open io, :header_hash => "", :sep => ";", :merge => true, :type => :double, :zipped => true, :monitor => true
|
39
|
+
tsv.each do |k,values|
|
40
|
+
clean_values = values.collect{|vs| vs.collect{|v| (v[0] == '"' and v[-1] = '"') ? v[1..-2] : v }.collect{|v| v.gsub("--SEMICOLON--", ';') } }
|
41
|
+
|
42
|
+
values.replace clean_values
|
43
|
+
end
|
44
|
+
tsv
|
45
|
+
end
|
46
|
+
|
47
|
+
Signor.claim Signor.protein_protein, :proc do
|
48
|
+
parser = TSV::Parser.new Signor.data
|
49
|
+
fields = parser.fields
|
50
|
+
dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (UniProt/SwissProt Accession)", "Effect", "Mechanism", "Residue"], :type => :double, :organism => Signor.organism
|
51
|
+
dumper.init
|
52
|
+
TSV.traverse parser, :into => dumper do |k,values|
|
53
|
+
info = {}
|
54
|
+
fields.zip(values).each do |field, value|
|
55
|
+
info[field] = value
|
56
|
+
end
|
57
|
+
next unless info["TYPEA"].first == "protein"
|
58
|
+
unia = info["IDA"].first
|
59
|
+
|
60
|
+
res = []
|
61
|
+
res.extend MultipleResult
|
62
|
+
|
63
|
+
info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["RESIDUE"]).each do |v|
|
64
|
+
typeb,idb,eff,mech,resi = v.flatten
|
65
|
+
next unless typeb == "protein"
|
66
|
+
res << [unia, [idb, eff, mech,resi]]
|
67
|
+
end
|
68
|
+
|
69
|
+
res
|
70
|
+
end
|
71
|
+
|
72
|
+
Misc.collapse_stream dumper.stream
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
iif Signor.protein_protein.produce(true).find if __FILE__ == $0
|
77
|
+
|
data/share/Ensembl/release_dates
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
#: :type=:single
|
2
2
|
#Release build
|
3
|
+
release-89 may2017
|
4
|
+
release-88 mar2017
|
5
|
+
release-87 dec2016
|
6
|
+
release-86 oct2016
|
7
|
+
release-85 jul2016
|
8
|
+
release-84 mar2016
|
9
|
+
release-83 dec2015
|
10
|
+
release-82 sep2015
|
11
|
+
release-81 jul2015
|
12
|
+
release-80 may2015
|
3
13
|
release-79 mar2015
|
4
14
|
release-78 dec2014
|
5
15
|
release-77 oct2014
|
@@ -672,6 +672,8 @@ file 'transcript_5utr' => ["exons", "transcript_exons", "transcripts"] do |t|
|
|
672
672
|
transcript = transcript2ensembl[transcript_id]
|
673
673
|
protein = transcript_protein[transcript]
|
674
674
|
|
675
|
+
next if transcript =~ /^LRG/
|
676
|
+
|
675
677
|
start_exon = exon2ensembl[start_exon]
|
676
678
|
eend_exon = exon2ensembl[eend_exon]
|
677
679
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -89,6 +89,7 @@ files:
|
|
89
89
|
- etc/allowed_biomart_archives
|
90
90
|
- etc/biomart/missing_in_archive
|
91
91
|
- etc/organisms
|
92
|
+
- lib/rbbt/sources/CASCADE.rb
|
92
93
|
- lib/rbbt/sources/COSTART.rb
|
93
94
|
- lib/rbbt/sources/CTCAE.rb
|
94
95
|
- lib/rbbt/sources/HPRD.rb
|
@@ -115,10 +116,13 @@ files:
|
|
115
116
|
- lib/rbbt/sources/organism.rb
|
116
117
|
- lib/rbbt/sources/pfam.rb
|
117
118
|
- lib/rbbt/sources/pharmagkb.rb
|
119
|
+
- lib/rbbt/sources/phospho_ELM.rb
|
120
|
+
- lib/rbbt/sources/phospho_site_plus.rb
|
118
121
|
- lib/rbbt/sources/pina.rb
|
119
122
|
- lib/rbbt/sources/polysearch.rb
|
120
123
|
- lib/rbbt/sources/pubmed.rb
|
121
124
|
- lib/rbbt/sources/reactome.rb
|
125
|
+
- lib/rbbt/sources/signor.rb
|
122
126
|
- lib/rbbt/sources/stitch.rb
|
123
127
|
- lib/rbbt/sources/string.rb
|
124
128
|
- lib/rbbt/sources/synapse.rb
|