rbbt-phgx 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/phgx.rb +2 -2
- data/lib/rbbt/mutation/mutation_assessor.rb +211 -0
- data/lib/rbbt/mutation/polyphen.rb +6 -6
- data/lib/rbbt/mutation/sift.rb +75 -19
- data/lib/rbbt/mutation/snps_and_go.rb +38 -10
- data/lib/rbbt/sources/cancer.rb +2 -2
- data/lib/rbbt/sources/kegg.rb +6 -2
- data/lib/rbbt/sources/matador.rb +5 -1
- data/lib/rbbt/sources/pharmagkb.rb +4 -1
- data/share/Cancer/anais_annotations +1 -1
- data/share/Cancer/anais_interactions +2 -1
- data/share/Cancer/cancer_genes.tsv +1 -1
- data/share/install/KEGG/Rakefile +6 -1
- data/share/install/PharmaGKB/Rakefile +4 -4
- data/share/install/lib/rake_helper.rb +2 -2
- data/test/rbbt/mutation/test_mutation_assessor.rb +36 -0
- data/test/rbbt/mutation/test_polyphen.rb +2 -2
- data/test/rbbt/mutation/test_sift.rb +5 -3
- metadata +11 -6
data/lib/phgx.rb
CHANGED
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'rbbt/util/open'
|
2
|
+
require 'rbbt/tsv'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'digest/md5'
|
5
|
+
module MutationAssessor
|
6
|
+
|
7
|
+
class NotDone < StandardError; end
|
8
|
+
URL="http://mutationassessor.org/"
|
9
|
+
ASTERISK = "*"[0]
|
10
|
+
|
11
|
+
# mutations is a hash of genes in Uniprot protein accession pointing to lists
|
12
|
+
# of aminoacid substitutions
|
13
|
+
def self.predict(mutations)
|
14
|
+
vars = mutations.collect{|gene, list|
|
15
|
+
list = [list] unless Array === list
|
16
|
+
list.collect do |mut|
|
17
|
+
[gene, mut] * "\t"
|
18
|
+
end
|
19
|
+
}.flatten.sort.uniq * "\n" + "\n"
|
20
|
+
|
21
|
+
post_data = { :beenQ => "1",
|
22
|
+
:info=> "on",
|
23
|
+
:tableQ=> "on",
|
24
|
+
:chr=> "on",
|
25
|
+
:bsites=> "on",
|
26
|
+
:timeout => 600,
|
27
|
+
:vars => vars}.collect{|k,v| [k,v] * "="} * "&"
|
28
|
+
|
29
|
+
Log.debug "Querying Mutation Assessor for: #{vars.split(/\n/).length}"
|
30
|
+
tries = 0
|
31
|
+
begin
|
32
|
+
doc = nil
|
33
|
+
TmpFile.with_file(post_data) do |post_file|
|
34
|
+
doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-file" => post_file }, :nocache => false))
|
35
|
+
end
|
36
|
+
|
37
|
+
textareas = doc.css('textarea')
|
38
|
+
result = textareas.last.content
|
39
|
+
|
40
|
+
if result =~ /Cannot parse variant/
|
41
|
+
tmp = TmpFile.tmp_file
|
42
|
+
variants = tmp + ".list"
|
43
|
+
Open.write(variants, post_data )
|
44
|
+
raise "Cannot parse variants. Variants in file #{ variants }"
|
45
|
+
end
|
46
|
+
|
47
|
+
raise NotDone, "Not done" if result =~ /\t.sent\t./
|
48
|
+
rescue NotDone
|
49
|
+
Log.debug "Mutation Assessor not done, waiting:"
|
50
|
+
Log.debug result
|
51
|
+
|
52
|
+
sleep 30
|
53
|
+
tries += 1
|
54
|
+
if tries < 10
|
55
|
+
retry
|
56
|
+
else
|
57
|
+
raise "Error processing Mutation Assessor response"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
if result.empty?
|
62
|
+
tmp = TmpFile.tmp_file
|
63
|
+
html = tmp + ".html"
|
64
|
+
variants = tmp + ".list"
|
65
|
+
Open.write(tmp, doc.content)
|
66
|
+
Open.write(variants, post_data )
|
67
|
+
raise "Result empty. Possible error. html in #{ html }, variants in #{variants}"
|
68
|
+
end
|
69
|
+
|
70
|
+
result.sub! /^\t/, ''
|
71
|
+
result.gsub! /\n\s*\d+\s*\t/s, "\n"
|
72
|
+
|
73
|
+
if result.empty?
|
74
|
+
TSV.setup({}, :header_hash => "", :type => :list)
|
75
|
+
else
|
76
|
+
TSV.open(StringIO.new(result), :header_hash => "", :type => :list)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.chunked_predict(mutations)
|
81
|
+
chunks = mutations.length.to_f / 1000
|
82
|
+
chunks = chunks.ceil
|
83
|
+
Misc.divide(mutations.sort, chunks).inject(nil) do |acc, list|
|
84
|
+
if acc.nil?
|
85
|
+
acc = predict(list)
|
86
|
+
else
|
87
|
+
acc = TSV.setup(acc.merge(predict(list)))
|
88
|
+
end
|
89
|
+
acc
|
90
|
+
end || {}
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.add_predictions(tsv)
|
94
|
+
raise "Input not TSV" unless TSV === tsv
|
95
|
+
|
96
|
+
raise "Field 'UniProt/SwissProt ID' Not in TSV" unless tsv.all_fields.include? "UniProt/SwissProt ID"
|
97
|
+
|
98
|
+
raise "Field 'Protein Mutation' Not in TSV" unless tsv.fields.include? "Protein Mutation"
|
99
|
+
|
100
|
+
data = []
|
101
|
+
if tsv.type == :double
|
102
|
+
tsv.through :key, ["UniProt/SwissProt ID", "Protein Mutation"] do |key,values|
|
103
|
+
uni_accs, mutations = values
|
104
|
+
mutations = mutations.reject{|mutation| mutation =~ /Indel/ or mutation[0] == mutation[-1] or mutation[-1] == ASTERISK or mutation[0] == ASTERISK }
|
105
|
+
next if uni_accs.nil? or uni_accs.compact.reject{|v| v.nil? or v.empty?}.empty? or mutations.empty?
|
106
|
+
|
107
|
+
uni_accs.compact.uniq.each do |uni_acc|
|
108
|
+
data << [uni_acc, mutations]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
else
|
112
|
+
tsv.through :key, ["UniProt/SwissProt ID", "Protein Mutation"] do |key,values|
|
113
|
+
uni_acc, mutation = values
|
114
|
+
next if uni_acc.nil? or uni_acc.empty?
|
115
|
+
next if mutation[0] == mutation[-1] or mutation[-1] == ASTERISK or mutation[0] == ASTERISK
|
116
|
+
data << [uni_acc, mutation]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
data.sort!
|
121
|
+
|
122
|
+
|
123
|
+
predictions = {}
|
124
|
+
predict(data).each{|uni_acc, values|
|
125
|
+
protein, mutation = uni_acc.split(/\s+/)
|
126
|
+
|
127
|
+
values = values.zip_fields
|
128
|
+
values.each do |v|
|
129
|
+
pred = v["Func. Impact"]
|
130
|
+
predictions[protein] ||= {}
|
131
|
+
predictions[protein][mutation] = pred
|
132
|
+
end
|
133
|
+
}
|
134
|
+
|
135
|
+
uni_acc_pos = tsv.identify_field "UniProt/SwissProt ID"
|
136
|
+
protein_field = tsv.identify_field "Protein Mutation"
|
137
|
+
|
138
|
+
if tsv.type == :double
|
139
|
+
tsv.add_field "MutationAssessor:Prediction" do |key,values|
|
140
|
+
uni_accs = if uni_acc_pos === :key
|
141
|
+
[key]
|
142
|
+
else
|
143
|
+
values[uni_acc_pos] || []
|
144
|
+
end
|
145
|
+
|
146
|
+
next if uni_accs.compact.reject{|v| v.nil? or v.empty?}.empty?
|
147
|
+
|
148
|
+
ddd uni_accs
|
149
|
+
|
150
|
+
mutations = values[protein_field]
|
151
|
+
ddd mutations
|
152
|
+
|
153
|
+
uni_accs.zip(mutations).collect do |uni_acc,mutation|
|
154
|
+
ddd uni_acc
|
155
|
+
ddd mutation
|
156
|
+
res = case
|
157
|
+
when (mutation.nil? or mutation.empty?)
|
158
|
+
"No Prediction"
|
159
|
+
when mutation[0] == mutation[-1]
|
160
|
+
"TOLERATED"
|
161
|
+
when (uni_acc.nil? or uni_acc.empty?)
|
162
|
+
"No Prediction"
|
163
|
+
else
|
164
|
+
list = []
|
165
|
+
list = predictions[uni_acc][mutation] if predictions.include? uni_acc
|
166
|
+
if list.nil?
|
167
|
+
"No Prediction"
|
168
|
+
else
|
169
|
+
list.first
|
170
|
+
end
|
171
|
+
end
|
172
|
+
res
|
173
|
+
end
|
174
|
+
end
|
175
|
+
else
|
176
|
+
tsv.add_field "MutationAssessor:Prediction" do |key,values|
|
177
|
+
uni_acc = if uni_acc_pos === :key
|
178
|
+
key
|
179
|
+
else
|
180
|
+
values[uni_acc_pos]
|
181
|
+
end
|
182
|
+
|
183
|
+
next if uni_acc.nil? or uni_acc.empty?
|
184
|
+
|
185
|
+
mutation = values[protein_field]
|
186
|
+
|
187
|
+
case
|
188
|
+
when (mutation.nil? or mutation.empty?)
|
189
|
+
"No Prediction"
|
190
|
+
when mutation[0] == mutation[-1]
|
191
|
+
"TOLERATED"
|
192
|
+
when (uni_acc.nil? or uni_acc.empty?)
|
193
|
+
"No Prediction"
|
194
|
+
else
|
195
|
+
list = []
|
196
|
+
list = predictions[uni_acc][mutation] if predictions.include? uni_acc
|
197
|
+
if list.nil?
|
198
|
+
"No Prediction"
|
199
|
+
else
|
200
|
+
list.first
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
tsv
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rbbt/util/open'
|
2
|
-
require 'rbbt/
|
2
|
+
require 'rbbt/tsv'
|
3
3
|
require 'nokogiri'
|
4
4
|
require 'digest/md5'
|
5
5
|
|
@@ -37,8 +37,6 @@ module Polyphen2
|
|
37
37
|
desc = Digest::MD5.hexdigest(options.inspect)
|
38
38
|
options["description"] = desc
|
39
39
|
|
40
|
-
ddd desc
|
41
|
-
|
42
40
|
doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
|
43
41
|
|
44
42
|
sid = doc.css('input[name=sid]').attr('value')
|
@@ -67,12 +65,14 @@ module Polyphen2
|
|
67
65
|
break
|
68
66
|
end
|
69
67
|
|
70
|
-
sleep
|
68
|
+
sleep 3
|
71
69
|
end
|
72
70
|
|
73
71
|
return nil if view_link.nil?
|
74
72
|
|
75
|
-
tsv = TSV.
|
73
|
+
tsv = TSV.open Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :merge => true, :fix => Proc.new{|l| l.gsub(/ *\t */, "\t")}
|
74
|
+
tsv.fields = tsv.fields.collect{|f| f.strip}
|
75
|
+
tsv.key_field = tsv.key_field.strip
|
76
76
|
|
77
77
|
return tsv
|
78
78
|
end
|
@@ -150,7 +150,7 @@ module Polyphen2
|
|
150
150
|
break
|
151
151
|
end
|
152
152
|
|
153
|
-
sleep
|
153
|
+
sleep 3
|
154
154
|
end
|
155
155
|
|
156
156
|
return nil if view_link.nil?
|
data/lib/rbbt/mutation/sift.rb
CHANGED
@@ -3,6 +3,24 @@ require 'nokogiri'
|
|
3
3
|
module SIFT
|
4
4
|
URL_AMINOACID="http://sift.jcvi.org/sift-bin/SIFT_pid_subst_all_submit.pl"
|
5
5
|
URL_GENOMIC="http://sift.jcvi.org/sift-bin/SIFT_feed_to_chr_coords.pl"
|
6
|
+
URL_ENSP="http://sift.jcvi.org/sift-bin/retrieve_enst.pl"
|
7
|
+
|
8
|
+
def self.predict(mutations)
|
9
|
+
doc = Nokogiri::HTML(Open.read(URL_ENSP, :wget_options => {"--post-data" => "'ENSP=#{mutations.collect{|p| p * ","} * "\n"}'"}, :nocache => false))
|
10
|
+
|
11
|
+
rows = []
|
12
|
+
doc.css('tr').each do |row|
|
13
|
+
rows << row.css('td').collect{|cell| cell.content.strip.sub "\302\240\302\240 ", ""}
|
14
|
+
end
|
15
|
+
|
16
|
+
rows.shift
|
17
|
+
|
18
|
+
if Array === mutations
|
19
|
+
rows
|
20
|
+
else
|
21
|
+
rows.first
|
22
|
+
end
|
23
|
+
end
|
6
24
|
|
7
25
|
def self.predict_aminoacid_mutation(accession, mutations)
|
8
26
|
doc = Nokogiri::HTML(Open.read(URL_AMINOACID, :wget_options => {"--post-data" => "'GI=#{[accession, mutations].flatten * ","}&sequences_to_select=BEST&seq_identity_filter=90'"}, :nocache => false))
|
@@ -52,42 +70,79 @@ module SIFT
|
|
52
70
|
def self.add_predictions(tsv)
|
53
71
|
raise "Input not TSV" unless TSV === tsv
|
54
72
|
|
55
|
-
raise "Field '
|
73
|
+
raise "Field 'RefSeq Protein ID' Not in TSV" unless tsv.fields.include? "RefSeq Protein ID"
|
56
74
|
|
57
75
|
raise "Field 'Protein Mutation' Not in TSV" unless tsv.fields.include? "Protein Mutation"
|
58
76
|
|
59
77
|
data = []
|
60
|
-
tsv.
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
78
|
+
if tsv.type == :double
|
79
|
+
tsv.through :key, ["Refseq Protein ID", "Protein Mutation"] do |key,values|
|
80
|
+
refseqs, mutations = values
|
81
|
+
mutations = mutations.reject{|mutation| mutation[0] == mutation[-1]}
|
82
|
+
next if refseqs.nil? or refseqs.compact.reject{|v| v.nil? or v.empty?}.empty? or mutations.empty?
|
83
|
+
refseqs.compact.uniq.each do |refseq|
|
84
|
+
data << [refseq, mutations]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
else
|
88
|
+
tsv.through :key, ["Refseq Protein ID", "Protein Mutation"] do |key,values|
|
89
|
+
refseq, mutation = values
|
90
|
+
next if refseq.nil? or refseq.empty?
|
91
|
+
next if mutation[0] == mutation[-1]
|
92
|
+
data << [refseq, mutation]
|
67
93
|
end
|
68
94
|
end
|
69
95
|
|
70
96
|
data.sort!
|
71
97
|
|
72
|
-
|
73
98
|
predictions = {}
|
74
99
|
predict_aminoacid_mutation_batch(data).each{|values| predictions[values[0] + ":" << values[1]] = values.values_at 3,4,5,6}
|
75
100
|
|
76
|
-
refseq_field = tsv.identify_field "
|
101
|
+
refseq_field = tsv.identify_field "RefSeq Protein ID"
|
77
102
|
protein_field = tsv.identify_field "Protein Mutation"
|
78
103
|
|
79
|
-
tsv.
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
104
|
+
if tsv.type == :double
|
105
|
+
tsv.add_field "SIFT:Prediction" do |key,values|
|
106
|
+
refseqs = if refseq_field === :key
|
107
|
+
[key]
|
108
|
+
else
|
109
|
+
values[refseq_field] || []
|
110
|
+
end
|
85
111
|
|
86
|
-
|
112
|
+
next if refseqs.compact.reject{|v| v.nil? or v.empty?}.empty?
|
87
113
|
|
88
|
-
|
114
|
+
mutations = values[protein_field]
|
115
|
+
|
116
|
+
refseqs.zip(mutations).collect do |refseq,mutation|
|
117
|
+
case
|
118
|
+
when (mutation.nil? or mutation.empty?)
|
119
|
+
"No Prediction"
|
120
|
+
when mutation[0] == mutation[-1]
|
121
|
+
"TOLERATED"
|
122
|
+
when (refseq.nil? or refseq.empty?)
|
123
|
+
"No Prediction"
|
124
|
+
else
|
125
|
+
list = predictions[refseq + ":" << mutation]
|
126
|
+
if list.nil?
|
127
|
+
"No Prediction"
|
128
|
+
else
|
129
|
+
list.first
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
else
|
135
|
+
tsv.add_field "SIFT:Prediction" do |key,values|
|
136
|
+
refseq = if refseq_field === :key
|
137
|
+
key
|
138
|
+
else
|
139
|
+
values[refseq_field]
|
140
|
+
end
|
141
|
+
|
142
|
+
next if refseq.nil? or refseq.empty?
|
143
|
+
|
144
|
+
mutation = values[protein_field]
|
89
145
|
|
90
|
-
refseqs.zip(mutations).collect do |refseq,mutation|
|
91
146
|
case
|
92
147
|
when (mutation.nil? or mutation.empty?)
|
93
148
|
"No Prediction"
|
@@ -104,6 +159,7 @@ module SIFT
|
|
104
159
|
end
|
105
160
|
end
|
106
161
|
end
|
162
|
+
|
107
163
|
end
|
108
164
|
|
109
165
|
tsv
|
@@ -29,21 +29,49 @@ module SNPSandGO
|
|
29
29
|
raise "Field 'Protein Mutation' Not in TSV" if protein_field.nil?
|
30
30
|
|
31
31
|
|
32
|
-
tsv.
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
if tsv.type == :double
|
33
|
+
tsv.add_field "SNPs&GO:Prediction" do |key,values|
|
34
|
+
uniprots = if uniprot_field === :key
|
35
|
+
[key]
|
36
|
+
else
|
37
|
+
values[uniprot_field] || []
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
+
mutations = values[protein_field]
|
40
41
|
|
41
|
-
|
42
|
+
uniprots.zip(mutations).collect{|uniprot,mutation|
|
43
|
+
case
|
44
|
+
when mutation.nil?
|
45
|
+
"No Prediction"
|
46
|
+
when mutation[0] == mutation[-1]
|
47
|
+
"Neutral"
|
48
|
+
when (uniprot.nil? or uniprot.empty?)
|
49
|
+
"No Prediction"
|
50
|
+
else
|
51
|
+
begin
|
52
|
+
SNPSandGO.predict(uniprot, mutation).first
|
53
|
+
rescue
|
54
|
+
"No Prediction"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
}
|
58
|
+
end
|
59
|
+
else
|
60
|
+
tsv.add_field "SNPs&GO:Prediction" do |key,values|
|
61
|
+
uniprot = if uniprot_field === :key
|
62
|
+
key
|
63
|
+
else
|
64
|
+
values[uniprot_field]
|
65
|
+
end
|
66
|
+
|
67
|
+
next if uniprot.nil? or uniprot.empty?
|
68
|
+
|
69
|
+
mutation = values[protein_field]
|
42
70
|
case
|
43
71
|
when mutation.nil?
|
44
72
|
"No Prediction"
|
45
73
|
when mutation[0] == mutation[-1]
|
46
|
-
|
74
|
+
"Neutral"
|
47
75
|
when (uniprot.nil? or uniprot.empty?)
|
48
76
|
"No Prediction"
|
49
77
|
else
|
@@ -53,7 +81,7 @@ module SNPSandGO
|
|
53
81
|
"No Prediction"
|
54
82
|
end
|
55
83
|
end
|
56
|
-
|
84
|
+
end
|
57
85
|
end
|
58
86
|
|
59
87
|
tsv
|
data/lib/rbbt/sources/cancer.rb
CHANGED
data/lib/rbbt/sources/kegg.rb
CHANGED
data/lib/rbbt/sources/matador.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#Tumor Type Term Database Score Name
|
1
2
|
Bladder GO:0000902 GeneOntology 0.0614214465 cell morphogenesis
|
2
3
|
Bladder GO:0001503 GeneOntology 0.0734366145 ossification
|
3
4
|
Bladder GO:0001569 GeneOntology 0.0847152522 patterning of blood vessels
|
@@ -3399,4 +3400,4 @@ Sarcoma IPR000418 Interpro 0.0528577740 Ets
|
|
3399
3400
|
Sarcoma IPR001876 Interpro 0.0253276834 Zinc finger, RanBP2-type
|
3400
3401
|
Sarcoma IPR003655 Interpro 0.0528577740 KRAB-related
|
3401
3402
|
Sarcoma IPR011991 Interpro 0.0528577740 Winged helix repressor DNA-binding
|
3402
|
-
Sarcoma REACT_1046 Reactome 0.0320454129 Pyruvate metabolism and TCA cycle
|
3403
|
+
Sarcoma REACT_1046 Reactome 0.0320454129 Pyruvate metabolism and TCA cycle
|
@@ -1,4 +1,4 @@
|
|
1
|
-
Symbol Name GeneID Chr Chr Band Cancer Somatic Mut Cancer Germline Mut Tumour Types (Somatic Mutations) Tumour Types (Germline Mutations) Cancer Syndrome Tissue Type Cancer Molecular Genetics Mutation Type Translocation Partner Other Germline Mut Other Syndrome/Disease
|
1
|
+
#Symbol Name GeneID Chr Chr Band Cancer Somatic Mut Cancer Germline Mut Tumour Types (Somatic Mutations) Tumour Types (Germline Mutations) Cancer Syndrome Tissue Type Cancer Molecular Genetics Mutation Type Translocation Partner Other Germline Mut Other Syndrome/Disease
|
2
2
|
ABL1 v-abl Abelson murine leukemia viral oncogene homolog 1 25 9 9q34.1 yes CML, ALL, T-ALL L Dom T, Mis BCR, ETV6, NUP214
|
3
3
|
ABL2 v-abl Abelson murine leukemia viral oncogene homolog 2 27 1 1q24-q25 yes AML L Dom T ETV6
|
4
4
|
ACSL3 acyl-CoA synthetase long-chain family member 3 2181 2 2q36 yes prostate E Dom T ETV1
|
data/share/install/KEGG/Rakefile
CHANGED
@@ -78,6 +78,7 @@ end
|
|
78
78
|
file :pathways => 'source/pathways' do |t|
|
79
79
|
descs = {}
|
80
80
|
names = {}
|
81
|
+
klass = {}
|
81
82
|
pathway = nil
|
82
83
|
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
83
84
|
if line =~ /ENTRY\s+(\w+)/
|
@@ -91,9 +92,13 @@ file :pathways => 'source/pathways' do |t|
|
|
91
92
|
if line =~ /DESCRIPTION (.*)/
|
92
93
|
descs[pathway] = $1.strip
|
93
94
|
end
|
95
|
+
|
96
|
+
if line =~ /CLASS (.*)/
|
97
|
+
klass[pathway] = $1.strip
|
98
|
+
end
|
94
99
|
end
|
95
100
|
|
96
|
-
Open.write(t.name, ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway]] * "\t"} * "\n")
|
101
|
+
Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
|
97
102
|
end
|
98
103
|
|
99
104
|
process_tsv :gene_pathway, 'hsa_gene_map.tab',
|
@@ -30,7 +30,7 @@ end
|
|
30
30
|
|
31
31
|
process_tsv :gene_drug, 'relationships',
|
32
32
|
:select => proc{|l| l =~ /Gene:/ && l =~ /Drug:/},
|
33
|
-
:
|
33
|
+
:key_field => 'Entity1_id',
|
34
34
|
:fields => ['Entity2_id','Relationship'],
|
35
35
|
:header_hash => "",
|
36
36
|
:merge => true,
|
@@ -42,7 +42,7 @@ end
|
|
42
42
|
|
43
43
|
process_tsv :gene_disease, 'relationships',
|
44
44
|
:select => proc{|l| l =~ /Gene:/ && l =~ /Disease:/},
|
45
|
-
:
|
45
|
+
:key_field => 1,
|
46
46
|
:fields => 3,
|
47
47
|
:merge => true,
|
48
48
|
:header_hash => "",
|
@@ -53,7 +53,7 @@ process_tsv :gene_disease, 'relationships',
|
|
53
53
|
end
|
54
54
|
|
55
55
|
process_tsv :variants, 'variants',
|
56
|
-
:
|
56
|
+
:key_field => 1,
|
57
57
|
:fields => [3,7,8,9,10,4,6,5],
|
58
58
|
:header_hash => "",
|
59
59
|
:merge => true,
|
@@ -90,7 +90,7 @@ file :gene_pathway => 'source/pathways' do |t|
|
|
90
90
|
end
|
91
91
|
|
92
92
|
File.open(t.name, 'w') do |f|
|
93
|
-
f.puts "#" + ['Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
|
93
|
+
f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
|
94
94
|
pathways.each do |pathway, info|
|
95
95
|
next if info[:genes].nil?
|
96
96
|
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
|
@@ -1,7 +1,7 @@
|
|
1
1
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '../../../lib'))
|
2
2
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
3
|
|
4
|
-
require 'rbbt/
|
4
|
+
require 'rbbt/tsv'
|
5
5
|
require 'rbbt/util/open'
|
6
6
|
require 'rbbt/util/log'
|
7
7
|
|
@@ -43,7 +43,7 @@ def process_tsv(file, source, options = {}, &block)
|
|
43
43
|
file file => File.join(SOURCE_DIR, source) do |t|
|
44
44
|
block.call
|
45
45
|
|
46
|
-
d = TSV.
|
46
|
+
d = TSV.open(t.prerequisites.first, options)
|
47
47
|
|
48
48
|
if d.fields != nil
|
49
49
|
data_fields = d.fields.dup.unshift d.key_field
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/mutation/mutation_assessor'
|
3
|
+
|
4
|
+
class TestMutationAssessor < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def _test_predict_aminoacid_mutation
|
7
|
+
mutations = {
|
8
|
+
"EGFR_HUMAN" => %w(R521K)
|
9
|
+
}
|
10
|
+
|
11
|
+
puts MutationAssessor.predict(mutations)
|
12
|
+
assert_equal 1, MutationAssessor.predict(mutations).length
|
13
|
+
end
|
14
|
+
|
15
|
+
def _test_predict_aminoacid_mutation_tsv
|
16
|
+
tsv = TSV.setup({"EGFR_HUMAN" => [%w(R521K)]}, :key_field => "UniProt/SwissProt ID", :fields => ["Protein Mutation"], :type => :double)
|
17
|
+
|
18
|
+
assert_equal "neutral", MutationAssessor.add_predictions(tsv).slice("MutationAssessor:Prediction").values.first.flatten.first
|
19
|
+
assert_equal 1, MutationAssessor.add_predictions(tsv).slice(["MutationAssessor:Prediction", "Protein Mutation"]).length
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def test_predict_chunked
|
24
|
+
mutations = {
|
25
|
+
"EGFR_HUMAN" => %w(R521K),
|
26
|
+
"P53_HUMAN" => %w(R21K),
|
27
|
+
}
|
28
|
+
|
29
|
+
puts MutationAssessor.chunked_predict(mutations)
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
end
|
36
|
+
|
@@ -6,7 +6,7 @@ class TestPolyphen2 < Test::Unit::TestCase
|
|
6
6
|
accession = "A6NFZ4"
|
7
7
|
mutation = "Y34D"
|
8
8
|
|
9
|
-
|
9
|
+
assert_equal "probably damaging", Polyphen2.predict(accession, mutation).first
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_batch
|
@@ -14,7 +14,7 @@ class TestPolyphen2 < Test::Unit::TestCase
|
|
14
14
|
A6NFZ4 Y34D
|
15
15
|
EOF
|
16
16
|
|
17
|
-
|
17
|
+
assert_equal "probably damaging", Polyphen2::Batch.predict(query)["A6NFZ4_Y34D"]["prediction"].first
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
@@ -14,10 +14,12 @@ class TestSIFT < Test::Unit::TestCase
|
|
14
14
|
accession = "NP_001008502"
|
15
15
|
mutation = "Q554P"
|
16
16
|
|
17
|
-
ddd SIFT.predict_aminoacid_mutation_batch( [[accession, mutation]])
|
18
17
|
assert_equal "TOLERATED", SIFT.predict_aminoacid_mutation_batch( [[accession, mutation]]).first[3]
|
19
18
|
end
|
20
|
-
|
19
|
+
def test_predict_aminoacid_mutation_batch
|
20
|
+
ensp = "ENSP00000224605"
|
21
|
+
mutation = "A63T"
|
22
|
+
assert_equal "TOLERATED", SIFT.predict( [[ensp, mutation]]).first[5]
|
23
|
+
end
|
21
24
|
|
22
25
|
end
|
23
|
-
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-phgx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 3
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07
|
18
|
+
date: 2011-09-07 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -26,10 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 63
|
30
30
|
segments:
|
31
|
+
- 4
|
31
32
|
- 0
|
32
|
-
|
33
|
+
- 0
|
34
|
+
version: 4.0.0
|
33
35
|
type: :runtime
|
34
36
|
version_requirements: *id001
|
35
37
|
description: Pharmaco-genomics related data sources
|
@@ -44,6 +46,7 @@ files:
|
|
44
46
|
- LICENSE
|
45
47
|
- lib/phgx.rb
|
46
48
|
- lib/rbbt/mutation/fireDB.rb
|
49
|
+
- lib/rbbt/mutation/mutation_assessor.rb
|
47
50
|
- lib/rbbt/mutation/polyphen.rb
|
48
51
|
- lib/rbbt/mutation/sift.rb
|
49
52
|
- lib/rbbt/mutation/snps_and_go.rb
|
@@ -83,6 +86,7 @@ files:
|
|
83
86
|
- test/rbbt/mutation/test_fireDB.rb
|
84
87
|
- test/rbbt/mutation/test_sift.rb
|
85
88
|
- test/rbbt/mutation/test_polyphen.rb
|
89
|
+
- test/rbbt/mutation/test_mutation_assessor.rb
|
86
90
|
- test/test_helper.rb
|
87
91
|
has_rdoc: true
|
88
92
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
@@ -127,4 +131,5 @@ test_files:
|
|
127
131
|
- test/rbbt/mutation/test_fireDB.rb
|
128
132
|
- test/rbbt/mutation/test_sift.rb
|
129
133
|
- test/rbbt/mutation/test_polyphen.rb
|
134
|
+
- test/rbbt/mutation/test_mutation_assessor.rb
|
130
135
|
- test/test_helper.rb
|