rbbt-phgx 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/mutation/mutation_assessor.rb +32 -24
- data/lib/rbbt/mutation/oncodriveFM.rb +146 -0
- data/lib/rbbt/mutation/polyphen.rb +59 -30
- data/lib/rbbt/mutation/sift.rb +16 -4
- data/lib/rbbt/mutation/snps_and_go.rb +1 -1
- data/lib/rbbt/mutation/transFIC.rb +97 -0
- data/lib/rbbt/sources/kegg.rb +38 -39
- data/lib/rbbt/sources/pharmagkb.rb +1 -1
- data/lib/rbbt/sources/pina.rb +26 -0
- data/lib/rbbt/sources/string.rb +19 -0
- data/share/install/Pina/Rakefile +2 -2
- data/share/install/STRING/Rakefile +1 -1
- data/share/install/software/OncodriveFM +13 -0
- data/test/rbbt/mutation/test_mutation_assessor.rb +1 -5
- data/test/rbbt/mutation/test_oncodriveFM.rb +13 -0
- data/test/rbbt/mutation/test_polyphen.rb +5 -3
- data/test/rbbt/mutation/test_transFIC.rb +14 -0
- data/test/rbbt/sources/test_kegg.rb +20 -0
- data/test/test_helper.rb +0 -3
- metadata +42 -56
- data/lib/rbbt/sources/hprd.rb +0 -6
- data/lib/rbbt/sources/reactome.rb +0 -6
- data/share/install/HPRD/Rakefile +0 -15
- data/share/install/Reactome/Rakefile +0 -36
@@ -5,12 +5,13 @@ require 'digest/md5'
|
|
5
5
|
module MutationAssessor
|
6
6
|
|
7
7
|
class NotDone < StandardError; end
|
8
|
-
URL="http://mutationassessor.org
|
8
|
+
URL="http://mutationassessor.org"
|
9
9
|
ASTERISK = "*"[0]
|
10
10
|
|
11
11
|
# mutations is a hash of genes in Uniprot protein accession pointing to lists
|
12
12
|
# of aminoacid substitutions
|
13
13
|
def self.predict(mutations)
|
14
|
+
return TSV.setup({}, :header_hash => "", :type => :list) if mutations.empty? or mutations.nil?
|
14
15
|
vars = mutations.collect{|gene, list|
|
15
16
|
list = [list] unless Array === list
|
16
17
|
list.collect do |mut|
|
@@ -36,12 +37,11 @@ module MutationAssessor
|
|
36
37
|
doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-file" => post_file }, :nocache => nocache))
|
37
38
|
end
|
38
39
|
|
39
|
-
textareas = doc.css('
|
40
|
+
textareas = doc.css('p')
|
40
41
|
|
41
42
|
if textareas.empty?
|
42
|
-
|
43
|
-
|
44
|
-
puts
|
43
|
+
Log.debug "No text area"
|
44
|
+
Log.debug doc.to_s
|
45
45
|
raise NotDone, "No text aread found in response HTML"
|
46
46
|
end
|
47
47
|
|
@@ -70,11 +70,11 @@ module MutationAssessor
|
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
73
|
-
if result.empty?
|
73
|
+
if result.empty? and mutations.any?
|
74
74
|
tmp = TmpFile.tmp_file
|
75
75
|
html = tmp + ".html"
|
76
76
|
variants = tmp + ".list"
|
77
|
-
Open.write(
|
77
|
+
Open.write(html, doc.content)
|
78
78
|
Open.write(variants, post_data )
|
79
79
|
raise "Result empty. Possible error. html in #{ html }, variants in #{variants}"
|
80
80
|
end
|
@@ -82,22 +82,34 @@ module MutationAssessor
|
|
82
82
|
result.sub! /^\t/, ''
|
83
83
|
result.gsub! /\n\s*\d+\s*\t/s, "\n"
|
84
84
|
|
85
|
+
Log.medium "Mutation Assessor DONE."
|
86
|
+
|
85
87
|
if result.empty?
|
86
88
|
TSV.setup({}, :header_hash => "", :type => :list)
|
87
89
|
else
|
88
|
-
TSV.open(StringIO.new(result), :header_hash => "", :type => :list)
|
90
|
+
res = TSV.open(StringIO.new(result), :header_hash => "", :type => :list)
|
91
|
+
res = res.slice((res.fields - ["Mapping issue"]))
|
92
|
+
res
|
89
93
|
end
|
90
94
|
end
|
91
95
|
|
92
|
-
def self.chunked_predict(mutations)
|
93
|
-
|
96
|
+
def self.chunked_predict(mutations, max = 1000)
|
97
|
+
flattened_mutations = mutations.collect{|g,list| list = [list] unless Array === list; list.collect{|m| [g,m] } }.flatten(1)
|
98
|
+
chunks = flattened_mutations.length.to_f / max
|
94
99
|
chunks = chunks.ceil
|
95
|
-
|
100
|
+
|
101
|
+
Log.debug("Mutation Assessor ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
|
102
|
+
num = 1
|
103
|
+
Misc.divide(flattened_mutations, chunks).inject(nil) do |acc, list|
|
104
|
+
Log.debug("Mutation Assessor ran with #{chunks} chunks: chunk #{num}") if chunks > 1
|
105
|
+
unflattened_mutations = {}
|
106
|
+
list.each{|g,m| next if g.nil?; unflattened_mutations[g] ||= []; unflattened_mutations[g] << m}
|
96
107
|
if acc.nil?
|
97
|
-
acc = predict(
|
108
|
+
acc = predict(unflattened_mutations)
|
98
109
|
else
|
99
|
-
acc = TSV.setup(acc.merge(predict(
|
110
|
+
acc = TSV.setup(acc.merge(predict(unflattened_mutations)))
|
100
111
|
end
|
112
|
+
num += 1
|
101
113
|
acc
|
102
114
|
end
|
103
115
|
end
|
@@ -131,19 +143,15 @@ module MutationAssessor
|
|
131
143
|
|
132
144
|
data.sort!
|
133
145
|
|
134
|
-
|
135
146
|
predictions = {}
|
136
147
|
predict(data).each{|uni_acc, values|
|
137
148
|
protein, mutation = uni_acc.split(/\s+/)
|
138
149
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
predictions[protein] ||= {}
|
143
|
-
predictions[protein][mutation] = pred
|
144
|
-
end
|
150
|
+
pred = values["Func. Impact"]
|
151
|
+
predictions[protein] ||= {}
|
152
|
+
predictions[protein][mutation] = pred
|
145
153
|
}
|
146
|
-
|
154
|
+
|
147
155
|
uni_acc_pos = tsv.identify_field "UniProt/SwissProt ID"
|
148
156
|
protein_field = tsv.identify_field "Protein Mutation"
|
149
157
|
|
@@ -169,11 +177,11 @@ module MutationAssessor
|
|
169
177
|
"No Prediction"
|
170
178
|
else
|
171
179
|
list = []
|
172
|
-
|
173
|
-
if
|
180
|
+
pred = predictions[uni_acc][mutation] if predictions.include? uni_acc
|
181
|
+
if pred.nil?
|
174
182
|
"No Prediction"
|
175
183
|
else
|
176
|
-
|
184
|
+
pred
|
177
185
|
end
|
178
186
|
end
|
179
187
|
res
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/tsv'
|
4
|
+
require 'digest/md5'
|
5
|
+
require 'rbbt/sources/organism'
|
6
|
+
|
7
|
+
module OncodriveFM
|
8
|
+
|
9
|
+
Rbbt.claim Rbbt.software.opt.OncodriveFM, :install, Rbbt.share.install.software.OncodriveFM.find
|
10
|
+
|
11
|
+
|
12
|
+
def self.process_cohort(cohort)
|
13
|
+
|
14
|
+
all_mutated_isoforms = cohort.metagenotype.mutated_isoforms.compact.flatten.uniq
|
15
|
+
nonsense = all_mutated_isoforms.select{|mi| mi.consequence == "MISS-SENSE"}
|
16
|
+
|
17
|
+
mutation_assessor = MutEval.job(:mutation_assessor, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
|
18
|
+
sift = MutEval.job(:sift, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
|
19
|
+
polyphen = MutEval.job(:polyphen, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
|
20
|
+
|
21
|
+
mutation_assessor_max = mutation_assessor.slice("Mutation Assessor Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
|
22
|
+
sift_max = sift.slice("SIFT Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
|
23
|
+
polyphen_max = polyphen.slice("Polyphen Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
|
24
|
+
|
25
|
+
mutation_file = []
|
26
|
+
cohort.each do |genotype|
|
27
|
+
sample = genotype.jobname
|
28
|
+
genotype.each do |mutation|
|
29
|
+
genes = mutation.genes
|
30
|
+
next if genes.empty?
|
31
|
+
mut_mis = mutation.mutated_isoforms
|
32
|
+
next if mut_mis.nil? or mut_mis.empty?
|
33
|
+
genes.each do |gene|
|
34
|
+
mis = mut_mis.select{|mi| mi.protein and mi.protein.gene == gene}
|
35
|
+
|
36
|
+
mutation_assessor.values_at(*mis)
|
37
|
+
ma_score = mutation_assessor.values_at(*mis).compact.collect{|v| v["Mutation Assessor Score"]}.first
|
38
|
+
sift_score = sift.values_at(*mis).compact.collect{|v| v["SIFT Score"]}.first
|
39
|
+
polyphen_score = polyphen.values_at(*mis).compact.collect{|v| v["Polyphen Score"]}.first
|
40
|
+
|
41
|
+
ma_score = mutation_assessor_max if mis.select{|mi| mi.truncated}.any?
|
42
|
+
sift_score = sift_max if mis.select{|mi| mi.truncated}.any?
|
43
|
+
polyphen_score = polyphen_max if mis.select{|mi| mi.truncated}.any?
|
44
|
+
|
45
|
+
mutation_file << [gene, sift_score || "NA", polyphen_score || "NA", ma_score || "NA", sample] * "\t"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
TmpFile.with_file(mutation_file * "\n") do |fmuts|
|
51
|
+
TmpFile.with_file do |outdir|
|
52
|
+
FileUtils.mkdir_p outdir unless File.exists? outdir
|
53
|
+
name = "Tumor"
|
54
|
+
|
55
|
+
TmpFile.with_file(config(fmuts, outdir, "[TUMOR]" => name)) do |fconf|
|
56
|
+
puts Open.read(fconf)
|
57
|
+
CMD.cmd("cd #{Rbbt.software.opt.OncodriveFM.bin.find}; ./pipeline_launcher.pl '#{fconf}'").read
|
58
|
+
end
|
59
|
+
|
60
|
+
outfile = File.join(outdir, name + '.fimp')
|
61
|
+
text = Open.read(outfile).gsub(/WARNING.*?\n/m,'').gsub(/\t-\t/,"\t\t").gsub(/\t-$/,"\t")
|
62
|
+
tsv = TSV.open(StringIO.new(text), :type => :list)
|
63
|
+
tsv.key_field = "Ensembl Gene ID"
|
64
|
+
tsv.fields = ["Associated Gene Name", "Sample count", "p-value", "unknown"]
|
65
|
+
|
66
|
+
tsv
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
CONFIG_TEMPLATE=<<-EOF
|
73
|
+
###########################################################################################
|
74
|
+
# Input data specific for the tumor under analysis
|
75
|
+
|
76
|
+
#tumor: This name will be used as prefix to name all intermediate and final pipeline files
|
77
|
+
tumor='[TUMOR]'
|
78
|
+
|
79
|
+
#mutfile: File that contains the mutations data of the tumor you want to analyze. Each row corresponds to the mutation of one gene in one sample. Its format should be:
|
80
|
+
#
|
81
|
+
####Ensembl_Gene_ID MA_Zscore CHASM_Zscore Sample_ID
|
82
|
+
mutfile='[MUTFILE]'
|
83
|
+
|
84
|
+
####numFIS: number of functional scores included in the mutations file and used to compute the functional impact bias
|
85
|
+
numFIS='[NUMFIS]'
|
86
|
+
|
87
|
+
###########################################################################################
|
88
|
+
|
89
|
+
###########################################################################################
|
90
|
+
# Common input data (change these only if you have downloaded different info files)
|
91
|
+
|
92
|
+
#genes2gos: File that contains the genes2gos mapping
|
93
|
+
genes2gos='[DATA_DIR]/common/slimgos_distrib/genes2gos'
|
94
|
+
|
95
|
+
#gosdistribs: Directory with the files that contain the distributions of SIFT, PPH2 and MA scores for each slimGOA obtained from 1000genomes.
|
96
|
+
gosdistribs='[DATA_DIR]/common/slimgos_distrib/'
|
97
|
+
|
98
|
+
#genes2symbols: File that contains the genes2symbols mapping obtained from BioMart. Its format should be:
|
99
|
+
#
|
100
|
+
####Ensembl_Gene_ID Gene_Symbol
|
101
|
+
genes2symbols='[DATA_DIR]/common/genes2symbols.txt'
|
102
|
+
|
103
|
+
extrec='NONE'
|
104
|
+
|
105
|
+
#genes2probes: File that contains the genes2probes mapping obtained from BioMart. Its format should be:
|
106
|
+
#
|
107
|
+
####Ensembl_Gene_ID Probe_ID
|
108
|
+
cp='[DATA_DIR]/common/cp.format'
|
109
|
+
|
110
|
+
#genesattr: File that contains genes' longest CDS' lengths obtained from BioMart and genes' basal nsSNVs rates computed from 1000genomes. This are used to assess the statistical significance of genes' mutations recurrence and genes' overmutation rates. Its format should be:
|
111
|
+
#
|
112
|
+
####Ensembl_Gene_ID Longest_CDS_length Basal_nsSNVs_rate
|
113
|
+
genesattr='[DATA_DIR]/common/ensgenes_cds.recurrence'
|
114
|
+
|
115
|
+
#outdir: Directory to write output files
|
116
|
+
outdir='[OUTDIR]'
|
117
|
+
|
118
|
+
#tmpdir: Directory to write intermediate files
|
119
|
+
tmpdir='[TMPDIR]'
|
120
|
+
|
121
|
+
#internal: whether the null distribution will be taken from variants observed in the tumor
|
122
|
+
internal='[INTERNAL]'
|
123
|
+
###########################################################################################
|
124
|
+
EOF
|
125
|
+
|
126
|
+
def self.config(mutfile, outdir, options = {})
|
127
|
+
options = Misc.add_defaults options,
|
128
|
+
"[TUMOR]" => "Tumor",
|
129
|
+
"[MUTFILE]" => mutfile,
|
130
|
+
"[NUMFIS]" => 3,
|
131
|
+
"[DATA_DIR]" => Rbbt.software.opt.OncodriveFM.data.find,
|
132
|
+
"[OUTDIR]" => outdir,
|
133
|
+
"[TMPDIR]" => Rbbt.tmp.OncodriveFM.find,
|
134
|
+
"[INTERNAL]" => 1
|
135
|
+
|
136
|
+
FileUtils.mkdir_p options["[TMPDIR]"] unless File.exists? options["[TMPDIR]"]
|
137
|
+
|
138
|
+
txt = CONFIG_TEMPLATE.dup
|
139
|
+
options.each do |key,value|
|
140
|
+
txt.gsub!(key, value.to_s)
|
141
|
+
end
|
142
|
+
|
143
|
+
txt
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
@@ -31,52 +31,81 @@ module Polyphen2
|
|
31
31
|
"_ggi_target_manage" => "Refresh",
|
32
32
|
}
|
33
33
|
|
34
|
-
|
35
|
-
|
34
|
+
def self.predict(query)
|
35
|
+
options = OPTIONS.merge "_ggi_batch" => query
|
36
36
|
|
37
|
-
|
38
|
-
|
37
|
+
desc = Digest::MD5.hexdigest(options.inspect)
|
38
|
+
options["description"] = desc
|
39
39
|
|
40
|
-
|
40
|
+
doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
|
41
41
|
|
42
|
-
|
42
|
+
sid = doc.css('input[name=sid]').attr('value')
|
43
43
|
|
44
|
-
|
45
|
-
|
44
|
+
options = REFRESH_OPTIONS.merge "sid" => sid
|
45
|
+
finished = false
|
46
46
|
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
view_link = nil
|
48
|
+
while not finished do
|
49
|
+
doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
|
50
50
|
|
51
|
-
|
51
|
+
result_table = doc.css('body > table')[1].css('table')[2]
|
52
52
|
|
53
|
-
|
53
|
+
rows = result_table.css('tr')
|
54
54
|
|
55
|
-
|
55
|
+
row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
57
|
+
cells = row.css('td')
|
58
|
+
if cells[2].content =~ /Error/
|
59
|
+
view_link = nil
|
60
|
+
break
|
61
|
+
end
|
62
62
|
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
if cells[1].content =~ /Short/
|
64
|
+
view_link = cells[1].css('a').attr('href')
|
65
|
+
break
|
66
|
+
end
|
67
|
+
|
68
|
+
sleep 5
|
66
69
|
end
|
67
70
|
|
68
|
-
|
69
|
-
end
|
71
|
+
return nil if view_link.nil?
|
70
72
|
|
71
|
-
|
73
|
+
tsv = TSV.open Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :merge => true, :fix => Proc.new{|l| l.gsub(/ *\t */, "\t")}
|
74
|
+
tsv.fields = tsv.fields.collect{|f| f.strip}
|
75
|
+
tsv.key_field = tsv.key_field.strip
|
72
76
|
|
73
|
-
|
74
|
-
tsv.fields = tsv.fields.collect{|f| f.strip}
|
75
|
-
tsv.key_field = tsv.key_field.strip
|
77
|
+
new_tsv = TSV.setup({}, :key_field => "Protein Mutation", :fields => tsv.fields)
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
+
tsv.through do |acc, values|
|
80
|
+
values.zip_fields.each do |v|
|
81
|
+
pos, wt, mt = v.values_at "o_pos", "o_aa1", "o_aa2"
|
82
|
+
key = [acc, [wt,pos,mt] * "" ] * ":"
|
83
|
+
new_tsv[key] = v
|
84
|
+
end
|
85
|
+
end
|
79
86
|
|
87
|
+
return new_tsv
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.chunked_predict(query, max = 1000)
|
91
|
+
mutations = query.split("\n")
|
92
|
+
chunks = mutations.length.to_f / max
|
93
|
+
chunks = chunks.ceil
|
94
|
+
|
95
|
+
num = 0
|
96
|
+
Log.debug("Polyphen2 ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
|
97
|
+
Misc.divide(mutations, chunks).inject(nil) do |acc, list|
|
98
|
+
num += 1
|
99
|
+
Log.debug("Polyphen2 ran with #{chunks} chunks: chunk #{num}") if chunks > 1
|
100
|
+
list = list * "\n"
|
101
|
+
if acc.nil?
|
102
|
+
acc = predict(list)
|
103
|
+
else
|
104
|
+
acc = TSV.setup(acc.merge(predict(list)))
|
105
|
+
end
|
106
|
+
acc
|
107
|
+
end
|
108
|
+
end
|
80
109
|
|
81
110
|
end
|
82
111
|
|
data/lib/rbbt/mutation/sift.rb
CHANGED
@@ -9,9 +9,14 @@ module SIFT
|
|
9
9
|
data_str = mutations.collect{|mut| mut.sub(':', ',')}.uniq * "\n"
|
10
10
|
doc = Nokogiri::HTML(Open.read(URL_ENSP, :wget_options => {"--post-data=" => "'ENSP=#{data_str}'"}))
|
11
11
|
|
12
|
+
if doc.to_s.match(/Your computer has exceeded its daily limit/)
|
13
|
+
Open.clean_cache(URL_ENSP, :wget_options => {"--post-data=" => "'ENSP=#{data_str}'"})
|
14
|
+
raise "Daily limit reached"
|
15
|
+
end
|
16
|
+
|
12
17
|
rows = []
|
13
18
|
doc.css('tr').each do |row|
|
14
|
-
rows << row.css('td').collect{|cell| cell.content.strip.sub "
|
19
|
+
rows << row.css('td').collect{|cell| content = cell.content.strip; content.sub(/\s* .*/, "").sub(/[^\w,]*$/,'')}
|
15
20
|
end
|
16
21
|
|
17
22
|
rows.shift
|
@@ -24,12 +29,19 @@ module SIFT
|
|
24
29
|
end
|
25
30
|
end
|
26
31
|
|
27
|
-
def self.chunked_predict(mutations)
|
28
|
-
chunks = mutations.length.to_f /
|
32
|
+
def self.chunked_predict(mutations, max = 500)
|
33
|
+
chunks = mutations.length.to_f / max
|
29
34
|
chunks = chunks.ceil
|
35
|
+
|
36
|
+
Log.debug("SIFT ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
|
37
|
+
|
30
38
|
tsv = TSV.setup({}, :type => :list, :key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"])
|
39
|
+
num = 1
|
31
40
|
Misc.divide(mutations.uniq.sort, chunks).inject(tsv) do |acc, list|
|
32
|
-
|
41
|
+
Log.debug("SIFT ran with #{chunks} chunks: chunk #{num}") if chunks > 1
|
42
|
+
acc = TSV.setup(acc.merge(predict(list)))
|
43
|
+
num + 1
|
44
|
+
acc
|
33
45
|
end
|
34
46
|
end
|
35
47
|
|
@@ -14,7 +14,7 @@ module SNPSandGO
|
|
14
14
|
|
15
15
|
res = Open.read(url)
|
16
16
|
|
17
|
-
raise "Error in prediction"
|
17
|
+
raise "Error in prediction: #{$1}" if res =~ /ERROR: (.*)/
|
18
18
|
|
19
19
|
res.match(/Position\s+WT\s+NEW\s+Effect\s+RI\n\s+\d+\s+[A-Z]\s+[A-Z]\s+(\w+)\s+(\d+)/).values_at 1,2
|
20
20
|
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/tsv'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rest_client'
|
7
|
+
require 'rbbt/sources/organism'
|
8
|
+
|
9
|
+
module TransFIC
|
10
|
+
|
11
|
+
class NotDone < StandardError; end
|
12
|
+
|
13
|
+
URL="http://bg.upf.edu/transfic/taskService"
|
14
|
+
ASTERISK = "*"[0]
|
15
|
+
|
16
|
+
# mutations is a hash of genes in Uniprot protein accession pointing to lists
|
17
|
+
# of aminoacid substitutions
|
18
|
+
def self.predict(mutations)
|
19
|
+
options = {}
|
20
|
+
ensp2uni = Organism.identifiers("Hsa").index :target => "UniProt/SwissProt ID", :fields => "Ensembl Protein ID", :persist => true
|
21
|
+
searchText = mutations.collect{|mutation| protein, change = mutation.split(":"); next if ensp2uni[protein].nil?; [ensp2uni[protein], change] * "\t"}.compact.uniq * "\n"
|
22
|
+
|
23
|
+
Log.debug "Querying TransFIC for: #{mutations.length} mutations"
|
24
|
+
|
25
|
+
TmpFile.with_file(searchText) do |file|
|
26
|
+
test_url = CMD.cmd("curl -X PUT -T '#{ file }' '#{ URL }'").read
|
27
|
+
|
28
|
+
result = nil
|
29
|
+
|
30
|
+
begin
|
31
|
+
Misc.insist(5) do
|
32
|
+
result = CMD.cmd("curl -X GET '#{ test_url }'").read
|
33
|
+
|
34
|
+
raise result.split("\n").select{|line| line =~ /Error/}.first if result =~ /Error/
|
35
|
+
|
36
|
+
while result =~ /executing/
|
37
|
+
sleep 10
|
38
|
+
result = CMD.cmd("curl -X GET '#{ test_url }'").read
|
39
|
+
end
|
40
|
+
|
41
|
+
raise result.split("\n").select{|line| line =~ /Error/}.first if result =~ /Error/
|
42
|
+
end
|
43
|
+
rescue
|
44
|
+
if $!.message =~ /validating/
|
45
|
+
Log.debug(Open.read(file))
|
46
|
+
end
|
47
|
+
raise $!
|
48
|
+
end
|
49
|
+
|
50
|
+
Log.medium("TransFIC DONE")
|
51
|
+
|
52
|
+
tsv = TSV.setup({}, :key_field => "Protein Mutation", :fields => %w(siftTransfic siftTransficLabel pph2Transfic pph2TransficLabel maTransfic maTransficLabel), :type => :list)
|
53
|
+
result.split("\n").each do |line|
|
54
|
+
next if line[0] == "#"[0]
|
55
|
+
|
56
|
+
id, hgnc, hgncdesc, transcript, ensp, sw, protein_position, amino_acids, sift, polyphen, mass,
|
57
|
+
siftTransfic, siftTransficLabel, pph2Transfic, pph2TransficLabel, maTransfic, maTransficLabel = line.split("\t")
|
58
|
+
|
59
|
+
change = [amino_acids.split("/").first, protein_position, amino_acids.split("/").last] * ""
|
60
|
+
mutation = [ensp,change] * ":"
|
61
|
+
|
62
|
+
tsv[mutation] = [siftTransfic, siftTransficLabel, pph2Transfic, pph2TransficLabel, maTransfic, maTransficLabel]
|
63
|
+
end
|
64
|
+
|
65
|
+
tsv.select(mutations)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.chunked_predict(mutations, max = 1000)
|
70
|
+
chunks = mutations.length.to_f / max
|
71
|
+
chunks = chunks.ceil
|
72
|
+
|
73
|
+
Log.debug("TransFIC ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
|
74
|
+
num = 1
|
75
|
+
Misc.divide(mutations, chunks).inject(nil) do |acc, list|
|
76
|
+
Log.debug("TransFIC ran with #{chunks} chunks: chunk #{num}") if chunks > 1
|
77
|
+
begin
|
78
|
+
result = predict(list)
|
79
|
+
rescue
|
80
|
+
if list.length > 2
|
81
|
+
Log.debug("Error predicting in transFIC. Divinding list of size #{list.length}")
|
82
|
+
result = chunked_predict(list, list.length / 2)
|
83
|
+
else
|
84
|
+
Log.debug("Error predicting in transFIC. Single error detected")
|
85
|
+
next
|
86
|
+
end
|
87
|
+
end
|
88
|
+
if acc.nil?
|
89
|
+
acc = result
|
90
|
+
else
|
91
|
+
acc = TSV.setup(acc.merge(result))
|
92
|
+
end
|
93
|
+
num += 1
|
94
|
+
acc
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/lib/rbbt/sources/kegg.rb
CHANGED
@@ -10,30 +10,35 @@ module KEGG
|
|
10
10
|
KEGG.claim KEGG.root.find, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
|
11
11
|
|
12
12
|
def self.names
|
13
|
-
@@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single
|
13
|
+
@@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.descriptions
|
17
|
-
@@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single
|
17
|
+
@@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
|
18
18
|
end
|
19
19
|
|
20
20
|
|
21
21
|
def self.index2genes
|
22
|
-
@@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
|
22
|
+
@@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
|
23
23
|
end
|
24
24
|
|
25
25
|
def self.index2ens
|
26
|
-
@@index2ens ||= KEGG.identifiers.index(:persist => true)
|
26
|
+
@@index2ens ||= KEGG.identifiers.index(:persist => true)
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.index2kegg
|
30
|
-
@@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
|
30
|
+
@@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
|
31
31
|
end
|
32
32
|
|
33
33
|
def self.id2name(id)
|
34
34
|
names[id]
|
35
35
|
end
|
36
36
|
|
37
|
+
def self.name2id(name)
|
38
|
+
names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
|
39
|
+
end
|
40
|
+
|
41
|
+
|
37
42
|
def self.description(id)
|
38
43
|
descriptions[id]
|
39
44
|
end
|
@@ -60,6 +65,7 @@ if defined? Entity
|
|
60
65
|
name = KEGG.id2name(self)
|
61
66
|
name.sub(/ - Homo.*/,'') unless name.nil?
|
62
67
|
end
|
68
|
+
persist :name
|
63
69
|
|
64
70
|
property :description => :single2array do
|
65
71
|
KEGG.description(self)
|
@@ -67,9 +73,10 @@ if defined? Entity
|
|
67
73
|
|
68
74
|
property :genes => :array2single do |*args|
|
69
75
|
organism = args.first || self.organism
|
70
|
-
|
71
|
-
each{|
|
76
|
+
KEGG.index2genes.values_at(*self).
|
77
|
+
each{|gene| gene.organism = organism if gene.respond_to? :organism }
|
72
78
|
end
|
79
|
+
persist :genes
|
73
80
|
end
|
74
81
|
|
75
82
|
if defined? Gene and Entity === Gene
|
@@ -85,50 +92,42 @@ if defined? Entity
|
|
85
92
|
end
|
86
93
|
end
|
87
94
|
|
88
|
-
def
|
89
|
-
return self
|
95
|
+
def from_kegg
|
96
|
+
return self unless format == "KEGG Gene ID"
|
90
97
|
if Array === self
|
91
|
-
KEGG.index2ens.values_at(*self)
|
98
|
+
Gene.setup KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism
|
92
99
|
else
|
93
|
-
KEGG.index2ens[self]
|
100
|
+
Gene.setup KEGG.index2ens[self], "Ensembl Gene ID", organism
|
94
101
|
end
|
95
102
|
end
|
96
103
|
|
97
|
-
def
|
98
|
-
|
99
|
-
|
104
|
+
def self.gene_kegg_pathway_index
|
105
|
+
@@gene_kegg_pathway_index ||=
|
106
|
+
KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
|
100
107
|
end
|
101
108
|
|
102
|
-
property :
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
new = _to(new_format)
|
115
|
-
new.each_with_index do |n,i|
|
116
|
-
c = self.annotated_array_clean_get_brackets(i)
|
117
|
-
if c.nil? or n.nil?
|
118
|
-
self[i] = nil
|
119
|
-
else
|
120
|
-
c.replace n
|
121
|
-
end
|
109
|
+
property :to => :array2single do |new_format|
|
110
|
+
case
|
111
|
+
when format == new_format
|
112
|
+
self
|
113
|
+
when format == "KEGG Gene ID"
|
114
|
+
ensembl = from_kegg.clean_annotations
|
115
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.values_at(*ensembl), new_format, organism)
|
116
|
+
when new_format == "KEGG Gene ID"
|
117
|
+
to_kegg
|
118
|
+
else
|
119
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
122
120
|
end
|
123
121
|
end
|
122
|
+
persist :to
|
124
123
|
|
125
|
-
property :to => :array2single do |new_format|
|
126
|
-
|
127
|
-
|
128
|
-
end
|
124
|
+
#property :to => :array2single do |new_format|
|
125
|
+
# return self if format == new_format
|
126
|
+
# to!(new_format).collect!{|v| Array === v ? v.first : v}
|
127
|
+
#end
|
129
128
|
|
130
129
|
property :kegg_pathways => :array2single do
|
131
|
-
@kegg_pathways ||=
|
130
|
+
@kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
|
132
131
|
each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
|
133
132
|
end
|
134
133
|
end
|
data/lib/rbbt/sources/pina.rb
CHANGED
@@ -7,3 +7,29 @@ module Pina
|
|
7
7
|
|
8
8
|
Pina.claim Pina.root.find, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
|
9
9
|
end
|
10
|
+
|
11
|
+
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
+
require 'rbbt/entity/gene'
|
13
|
+
require 'rbbt/entity/interactor'
|
14
|
+
require 'rbbt/sources/PSI_MI'
|
15
|
+
|
16
|
+
module Gene
|
17
|
+
property :pina_interactors => :array2single do
|
18
|
+
ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
|
19
|
+
pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
|
20
|
+
|
21
|
+
int = self.ensembl.collect do |ens|
|
22
|
+
uniprot = ens2uniprot[ens]
|
23
|
+
list = pina.values_at(*uniprot).compact.collect do |v|
|
24
|
+
Misc.zip_fields(v).collect do |o, method, articles|
|
25
|
+
Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
|
26
|
+
end
|
27
|
+
end.flatten.uniq
|
28
|
+
Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
29
|
+
end
|
30
|
+
|
31
|
+
Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
data/lib/rbbt/sources/string.rb
CHANGED
@@ -7,3 +7,22 @@ module STRING
|
|
7
7
|
|
8
8
|
STRING.claim STRING.root.find, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
|
9
9
|
end
|
10
|
+
|
11
|
+
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
+
module Gene
|
13
|
+
property :string_interactors => :array2single do |*args|
|
14
|
+
threshold = args.first || 800
|
15
|
+
string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
|
16
|
+
all = self.ensembl.collect do |gene|
|
17
|
+
interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
|
18
|
+
Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
|
19
|
+
end
|
20
|
+
|
21
|
+
all.compact.first.annotate all if Annotated === all.compact.first
|
22
|
+
|
23
|
+
all
|
24
|
+
end
|
25
|
+
#persist :_ary_string_interactors
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
data/share/install/Pina/Rakefile
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
2
|
|
3
|
-
define_source_tasks "Homo sapiens-
|
3
|
+
define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
|
4
4
|
|
5
|
-
process_tsv :protein_protein, 'Homo sapiens-
|
5
|
+
process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
|
6
6
|
:key => 0,
|
7
7
|
:fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
|
8
8
|
:fields => [1,6,8],
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
2
|
|
3
|
-
define_source_tasks "protein_protein" => "http://string-db.org
|
3
|
+
define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.0.txt.gz"
|
4
4
|
|
5
5
|
process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
|
6
6
|
headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
|
@@ -5,7 +5,7 @@ class TestMutationAssessor < Test::Unit::TestCase
|
|
5
5
|
|
6
6
|
def test_predict_aminoacid_mutation
|
7
7
|
mutations = {
|
8
|
-
"EGFR_HUMAN" => %w(
|
8
|
+
"EGFR_HUMAN" => %w(R521E)
|
9
9
|
}
|
10
10
|
|
11
11
|
assert_equal 1, MutationAssessor.predict(mutations).length
|
@@ -27,9 +27,5 @@ class TestMutationAssessor < Test::Unit::TestCase
|
|
27
27
|
|
28
28
|
assert(MutationAssessor.chunked_predict(mutations).include? "EGFR_HUMAN R521K")
|
29
29
|
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
30
|
end
|
35
31
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/mutation/oncodriveFM'
|
3
|
+
|
4
|
+
class TestOncodriveFM < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_CLL
|
7
|
+
require 'rbbt/workflow'
|
8
|
+
Workflow.require_workflow "StudyExplorer"
|
9
|
+
s = Study.setup("CLL")
|
10
|
+
puts OncodriveFM.process_cohort(s.cohort).select("p-value"){|v| not v.empty? and v.to_f < 0.05}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
@@ -2,7 +2,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helpe
|
|
2
2
|
require 'rbbt/mutation/polyphen'
|
3
3
|
|
4
4
|
class TestPolyphen2 < Test::Unit::TestCase
|
5
|
-
def
|
5
|
+
def _test_predict_disease
|
6
6
|
accession = "A6NFZ4"
|
7
7
|
mutation = "Y34D"
|
8
8
|
|
@@ -11,10 +11,12 @@ class TestPolyphen2 < Test::Unit::TestCase
|
|
11
11
|
|
12
12
|
def test_batch
|
13
13
|
query =<<-EOF
|
14
|
-
A6NFZ4
|
14
|
+
A6NFZ4 34 Y D
|
15
15
|
EOF
|
16
16
|
|
17
|
-
|
17
|
+
ddd Polyphen2::Batch.predict(query)["A6NFZ4:Y34D"]
|
18
|
+
assert_equal "probably damaging", Polyphen2::Batch.predict(query)["A6NFZ4:Y34D"]["prediction"]
|
19
|
+
assert_equal "probably damaging", Polyphen2::Batch.chunked_predict(query)["A6NFZ4:Y34D"]["prediction"]
|
18
20
|
end
|
19
21
|
end
|
20
22
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/mutation/transFIC'
|
3
|
+
|
4
|
+
class TestTransFIC < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_predict_aminoacid_mutation
|
7
|
+
mutations = [
|
8
|
+
"ENSP00000275493:R521K"
|
9
|
+
]
|
10
|
+
puts TransFIC.predict(mutations)
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/entity/gene'
|
5
|
+
require 'rbbt/sources/kegg'
|
6
|
+
|
7
|
+
class TestKEGG < Test::Unit::TestCase
|
8
|
+
def test_kegg_gene
|
9
|
+
organism = "Hsa"
|
10
|
+
gene = Gene.setup "TP53", "Associated Gene Name", organism
|
11
|
+
|
12
|
+
assert_equal gene.organism, gene.to_kegg.from_kegg.organism
|
13
|
+
assert_equal "KEGG Gene ID", gene.to_kegg.format
|
14
|
+
assert_equal organism, gene.to_kegg.organism
|
15
|
+
assert_equal gene.ensembl, gene.to_kegg.ensembl
|
16
|
+
assert_equal gene.name, gene.to_kegg.ensembl.name
|
17
|
+
assert_equal gene.to_kegg.ensembl.name, gene.to_kegg.name
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,64 +1,55 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-phgx
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 0
|
10
|
-
version: 1.0.0
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Miguel Vazquez
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rbbt-util
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 63
|
30
|
-
segments:
|
31
|
-
- 4
|
32
|
-
- 0
|
33
|
-
- 0
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
34
21
|
version: 4.0.0
|
35
22
|
type: :runtime
|
36
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 4.0.0
|
37
30
|
description: Pharmaco-genomics related data sources
|
38
31
|
email: miguel.vazquez@fdi.ucm.es
|
39
32
|
executables: []
|
40
|
-
|
41
33
|
extensions: []
|
42
|
-
|
43
|
-
extra_rdoc_files:
|
34
|
+
extra_rdoc_files:
|
44
35
|
- LICENSE
|
45
|
-
files:
|
36
|
+
files:
|
46
37
|
- LICENSE
|
47
38
|
- lib/phgx.rb
|
48
39
|
- lib/rbbt/mutation/fireDB.rb
|
49
40
|
- lib/rbbt/mutation/mutation_assessor.rb
|
41
|
+
- lib/rbbt/mutation/oncodriveFM.rb
|
50
42
|
- lib/rbbt/mutation/polyphen.rb
|
51
43
|
- lib/rbbt/mutation/sift.rb
|
52
44
|
- lib/rbbt/mutation/snps_and_go.rb
|
45
|
+
- lib/rbbt/mutation/transFIC.rb
|
53
46
|
- lib/rbbt/sources/biogrid.rb
|
54
47
|
- lib/rbbt/sources/cancer.rb
|
55
48
|
- lib/rbbt/sources/dbsnp.rb
|
56
|
-
- lib/rbbt/sources/hprd.rb
|
57
49
|
- lib/rbbt/sources/kegg.rb
|
58
50
|
- lib/rbbt/sources/matador.rb
|
59
51
|
- lib/rbbt/sources/pharmagkb.rb
|
60
52
|
- lib/rbbt/sources/pina.rb
|
61
|
-
- lib/rbbt/sources/reactome.rb
|
62
53
|
- lib/rbbt/sources/stitch.rb
|
63
54
|
- lib/rbbt/sources/string.rb
|
64
55
|
- share/Cancer/anais_annotations
|
@@ -66,68 +57,63 @@ files:
|
|
66
57
|
- share/Cancer/cancer_genes.tsv
|
67
58
|
- share/install/Biogrid/Rakefile
|
68
59
|
- share/install/DBSNP/Rakefile
|
69
|
-
- share/install/HPRD/Rakefile
|
70
60
|
- share/install/KEGG/Rakefile
|
71
61
|
- share/install/Matador/Rakefile
|
72
62
|
- share/install/NCI/Rakefile
|
73
63
|
- share/install/PharmaGKB/Rakefile
|
74
64
|
- share/install/Pina/Rakefile
|
75
|
-
- share/install/Reactome/Rakefile
|
76
65
|
- share/install/STITCH/Rakefile
|
77
66
|
- share/install/STRING/Rakefile
|
78
67
|
- share/install/lib/rake_helper.rb
|
68
|
+
- share/install/software/OncodriveFM
|
79
69
|
- test/rbbt/sources/test_matador.rb
|
80
70
|
- test/rbbt/sources/test_pharmagkb.rb
|
81
71
|
- test/rbbt/sources/test_stitch.rb
|
82
72
|
- test/rbbt/sources/test_cancer.rb
|
73
|
+
- test/rbbt/sources/test_kegg.rb
|
83
74
|
- test/rbbt/mutation/test_snps_and_go.rb
|
84
75
|
- test/rbbt/mutation/test_fireDB.rb
|
85
76
|
- test/rbbt/mutation/test_sift.rb
|
86
77
|
- test/rbbt/mutation/test_polyphen.rb
|
87
78
|
- test/rbbt/mutation/test_mutation_assessor.rb
|
79
|
+
- test/rbbt/mutation/test_oncodriveFM.rb
|
80
|
+
- test/rbbt/mutation/test_transFIC.rb
|
88
81
|
- test/test_helper.rb
|
89
|
-
has_rdoc: true
|
90
82
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
91
83
|
licenses: []
|
92
|
-
|
93
84
|
post_install_message:
|
94
85
|
rdoc_options: []
|
95
|
-
|
96
|
-
require_paths:
|
86
|
+
require_paths:
|
97
87
|
- lib
|
98
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
89
|
none: false
|
100
|
-
requirements:
|
101
|
-
- -
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
|
104
|
-
|
105
|
-
- 0
|
106
|
-
version: "0"
|
107
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
95
|
none: false
|
109
|
-
requirements:
|
110
|
-
- -
|
111
|
-
- !ruby/object:Gem::Version
|
112
|
-
|
113
|
-
segments:
|
114
|
-
- 0
|
115
|
-
version: "0"
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
116
100
|
requirements: []
|
117
|
-
|
118
101
|
rubyforge_project:
|
119
|
-
rubygems_version: 1.
|
102
|
+
rubygems_version: 1.8.24
|
120
103
|
signing_key:
|
121
104
|
specification_version: 3
|
122
105
|
summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
123
|
-
test_files:
|
106
|
+
test_files:
|
124
107
|
- test/rbbt/sources/test_matador.rb
|
125
108
|
- test/rbbt/sources/test_pharmagkb.rb
|
126
109
|
- test/rbbt/sources/test_stitch.rb
|
127
110
|
- test/rbbt/sources/test_cancer.rb
|
111
|
+
- test/rbbt/sources/test_kegg.rb
|
128
112
|
- test/rbbt/mutation/test_snps_and_go.rb
|
129
113
|
- test/rbbt/mutation/test_fireDB.rb
|
130
114
|
- test/rbbt/mutation/test_sift.rb
|
131
115
|
- test/rbbt/mutation/test_polyphen.rb
|
132
116
|
- test/rbbt/mutation/test_mutation_assessor.rb
|
117
|
+
- test/rbbt/mutation/test_oncodriveFM.rb
|
118
|
+
- test/rbbt/mutation/test_transFIC.rb
|
133
119
|
- test/test_helper.rb
|
data/lib/rbbt/sources/hprd.rb
DELETED
data/share/install/HPRD/Rakefile
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
-
|
3
|
-
|
4
|
-
file :protein_protein do |t|
|
5
|
-
begin
|
6
|
-
tsv = PhGx.share.hprd["BINARY_PROTEIN_PROTEIN_INTERACTIONS.txt"].tsv :merge => true
|
7
|
-
rescue
|
8
|
-
raise "File BINARY_PROTEIN_PROTEIN_INTERACTIONS.txt not found in 'share/hprd', download manually from http://www.hprd.org/"
|
9
|
-
end
|
10
|
-
tsv.key_field = "Associated Gene Name 1"
|
11
|
-
tsv.fields = ["HPRD id 1","RefSeq Protein ID 1","Associated Gene Name 2","HPRD id 2","RefSeq Protein ID 2", "Experiment type", "PMID"]
|
12
|
-
tsv.namespace = "Hsa"
|
13
|
-
|
14
|
-
Open.write(t.name, tsv.to_s)
|
15
|
-
end
|
@@ -1,36 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
-
|
3
|
-
define_source_tasks "human_ppi" => "http://www.genomeknowledge.org/download/current/homo_sapiens.interactions.txt.gz",
|
4
|
-
"protein_pathway" => "http://www.genomeknowledge.org/download/current/uniprot_2_pathways.stid.txt",
|
5
|
-
"pathway_genesets" => "http://www.genomeknowledge.org/download/current/ReactomePathways.gmt.zip"
|
6
|
-
|
7
|
-
process_tsv :protein_protein, 'human_ppi',
|
8
|
-
:key => 0,
|
9
|
-
:fix => lambda{|l| l.gsub(/\t[a-z ]+:/i,"\t").gsub(/^[a-z ]+:/i,'')},
|
10
|
-
:fields => [3,6,7,8],
|
11
|
-
:header_hash => "#",
|
12
|
-
:merge => true,
|
13
|
-
:keep_empty => true do
|
14
|
-
|
15
|
-
headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Interaction Type', 'Reactions Involved', 'Interaction PMIDS']
|
16
|
-
end
|
17
|
-
|
18
|
-
process_tsv :protein_pathway, 'protein_pathway',
|
19
|
-
:key => 0,
|
20
|
-
:fix => lambda{|l| l.gsub(/\t[a-z ]+:/i,"\t").gsub(/^[a-z ]+:/i,'')},
|
21
|
-
:fields => [1,2],
|
22
|
-
:header_hash => "#",
|
23
|
-
:merge => true,
|
24
|
-
:keep_empty => true do
|
25
|
-
|
26
|
-
headers ['UniProt/SwissProt Accession', 'Pathway ID', 'Pathway Description']
|
27
|
-
end
|
28
|
-
|
29
|
-
process_tsv :pathway_genesets, 'pathway_genesets',
|
30
|
-
:key => 0,
|
31
|
-
:fix => lambda{|l| parts = l.split("\t"); [parts[0], parts[2..-1] * "|"] * "\t"},
|
32
|
-
:keep_empty => true do
|
33
|
-
|
34
|
-
headers ['Reactome Pathway Name', 'Associated Gene Name']
|
35
|
-
end
|
36
|
-
|