rbbt-phgx 0.3.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/mutation/mutation_assessor.rb +19 -12
- data/lib/rbbt/mutation/sift.rb +15 -4
- data/lib/rbbt/sources/kegg.rb +94 -15
- data/lib/rbbt/sources/matador.rb +0 -1
- data/lib/rbbt/sources/string.rb +0 -1
- data/share/Cancer/cancer_genes.tsv +1 -1
- data/share/install/Matador/Rakefile +1 -2
- data/test/rbbt/mutation/test_mutation_assessor.rb +3 -4
- data/test/rbbt/mutation/test_sift.rb +10 -2
- metadata +5 -7
- data/lib/rbbt/sources/nci.rb +0 -6
- data/lib/rbbt/sources/sift.rb +0 -5
@@ -28,13 +28,23 @@ module MutationAssessor
|
|
28
28
|
|
29
29
|
Log.debug "Querying Mutation Assessor for: #{vars.split(/\n/).length}"
|
30
30
|
tries = 0
|
31
|
+
nocache = false
|
31
32
|
begin
|
32
33
|
doc = nil
|
33
34
|
TmpFile.with_file(post_data) do |post_file|
|
34
|
-
|
35
|
+
Log.medium "Updating cache:" if nocache == :update
|
36
|
+
doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-file" => post_file }, :nocache => nocache))
|
35
37
|
end
|
36
38
|
|
37
39
|
textareas = doc.css('textarea')
|
40
|
+
|
41
|
+
if textareas.empty?
|
42
|
+
puts "No text area"
|
43
|
+
puts doc
|
44
|
+
puts
|
45
|
+
raise NotDone, "No text aread found in response HTML"
|
46
|
+
end
|
47
|
+
|
38
48
|
result = textareas.last.content
|
39
49
|
|
40
50
|
if result =~ /Cannot parse variant/
|
@@ -44,14 +54,16 @@ module MutationAssessor
|
|
44
54
|
raise "Cannot parse variants. Variants in file #{ variants }"
|
45
55
|
end
|
46
56
|
|
47
|
-
raise NotDone, "Not done" if result =~ /\t
|
57
|
+
raise NotDone, "Not done" if result =~ /\t\[sent\]\t/
|
48
58
|
rescue NotDone
|
49
|
-
Log.debug "Mutation Assessor not done, waiting:"
|
50
|
-
Log.debug result
|
51
|
-
|
52
|
-
sleep 30
|
53
59
|
tries += 1
|
60
|
+
nocache = :update
|
61
|
+
|
62
|
+
Log.medium "Mutation Assessor not done, waiting:"
|
63
|
+
sleep 30
|
64
|
+
|
54
65
|
if tries < 10
|
66
|
+
Log.medium "Retrying mutation assessor"
|
55
67
|
retry
|
56
68
|
else
|
57
69
|
raise "Error processing Mutation Assessor response"
|
@@ -80,7 +92,7 @@ module MutationAssessor
|
|
80
92
|
def self.chunked_predict(mutations)
|
81
93
|
chunks = mutations.length.to_f / 1000
|
82
94
|
chunks = chunks.ceil
|
83
|
-
Misc.divide(mutations.
|
95
|
+
Misc.divide(mutations.sort_by{|m| m * ":"}, chunks).inject(nil) do |acc, list|
|
84
96
|
if acc.nil?
|
85
97
|
acc = predict(list)
|
86
98
|
else
|
@@ -145,14 +157,9 @@ module MutationAssessor
|
|
145
157
|
|
146
158
|
next if uni_accs.compact.reject{|v| v.nil? or v.empty?}.empty?
|
147
159
|
|
148
|
-
ddd uni_accs
|
149
|
-
|
150
160
|
mutations = values[protein_field]
|
151
|
-
ddd mutations
|
152
161
|
|
153
162
|
uni_accs.zip(mutations).collect do |uni_acc,mutation|
|
154
|
-
ddd uni_acc
|
155
|
-
ddd mutation
|
156
163
|
res = case
|
157
164
|
when (mutation.nil? or mutation.empty?)
|
158
165
|
"No Prediction"
|
data/lib/rbbt/mutation/sift.rb
CHANGED
@@ -6,7 +6,8 @@ module SIFT
|
|
6
6
|
URL_ENSP="http://sift.jcvi.org/sift-bin/retrieve_enst.pl"
|
7
7
|
|
8
8
|
def self.predict(mutations)
|
9
|
-
|
9
|
+
data_str = mutations.collect{|mut| mut.sub(':', ',')}.uniq * "\n"
|
10
|
+
doc = Nokogiri::HTML(Open.read(URL_ENSP, :wget_options => {"--post-data=" => "'ENSP=#{data_str}'"}))
|
10
11
|
|
11
12
|
rows = []
|
12
13
|
doc.css('tr').each do |row|
|
@@ -15,10 +16,20 @@ module SIFT
|
|
15
16
|
|
16
17
|
rows.shift
|
17
18
|
|
18
|
-
if
|
19
|
-
rows
|
19
|
+
if rows.any?
|
20
|
+
TSV.open StringIO.new(rows.collect{|row| row.collect{|v| v.sub(/(ENSP\d+),/,'\1:')} * "\t"} * "\n"), :list,
|
21
|
+
:key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"]
|
20
22
|
else
|
21
|
-
|
23
|
+
TSV.setup({}, :type => :list, :key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"])
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.chunked_predict(mutations)
|
28
|
+
chunks = mutations.length.to_f / 100
|
29
|
+
chunks = chunks.ceil
|
30
|
+
tsv = TSV.setup({}, :type => :list, :key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"])
|
31
|
+
Misc.divide(mutations.uniq.sort, chunks).inject(tsv) do |acc, list|
|
32
|
+
acc = TSV.setup(acc.merge(predict(list)))
|
22
33
|
end
|
23
34
|
end
|
24
35
|
|
data/lib/rbbt/sources/kegg.rb
CHANGED
@@ -6,6 +6,7 @@ module KEGG
|
|
6
6
|
self.pkgdir = "phgx"
|
7
7
|
self.subdir = "share/kegg"
|
8
8
|
|
9
|
+
|
9
10
|
KEGG.claim KEGG.root.find, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
|
10
11
|
|
11
12
|
def self.names
|
@@ -13,16 +14,20 @@ module KEGG
|
|
13
14
|
end
|
14
15
|
|
15
16
|
def self.descriptions
|
16
|
-
@@descriptions ||= KEGG.pathways.tsv
|
17
|
+
@@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single).tap{|o| o.unnamed = true}
|
17
18
|
end
|
18
19
|
|
19
20
|
|
21
|
+
def self.index2genes
|
22
|
+
@@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true).tap{|o| o.unnamed = true}
|
23
|
+
end
|
24
|
+
|
20
25
|
def self.index2ens
|
21
|
-
@@index2ens ||= KEGG.identifiers.index
|
26
|
+
@@index2ens ||= KEGG.identifiers.index(:persist => true).tap{|o| o.unnamed = true}
|
22
27
|
end
|
23
28
|
|
24
29
|
def self.index2kegg
|
25
|
-
@@index2kegg ||= KEGG.identifiers.index
|
30
|
+
@@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true).tap{|o| o.unnamed = true}
|
26
31
|
end
|
27
32
|
|
28
33
|
def self.id2name(id)
|
@@ -34,24 +39,98 @@ module KEGG
|
|
34
39
|
end
|
35
40
|
end
|
36
41
|
|
37
|
-
|
42
|
+
if defined? Entity
|
43
|
+
|
44
|
+
module KeggPathway
|
45
|
+
extend Entity
|
46
|
+
self.format = "KEGG Pathway ID"
|
47
|
+
|
48
|
+
self.annotation :organism
|
49
|
+
|
50
|
+
def self.filter(query, field = nil, options = nil, entity = nil)
|
51
|
+
return true if query == entity
|
38
52
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
53
|
+
return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
|
54
|
+
|
55
|
+
false
|
56
|
+
end
|
57
|
+
|
58
|
+
property :name => :single2array do
|
59
|
+
return nil if self.nil?
|
60
|
+
name = KEGG.id2name(self)
|
61
|
+
name.sub(/ - Homo.*/,'') unless name.nil?
|
62
|
+
end
|
63
|
+
|
64
|
+
property :description => :single2array do
|
65
|
+
KEGG.description(self)
|
44
66
|
end
|
45
|
-
end
|
46
67
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism)
|
68
|
+
property :genes => :array2single do |*args|
|
69
|
+
organism = args.first || self.organism
|
70
|
+
@genes ||= KEGG.index2genes.values_at(*self).
|
71
|
+
each{|pth| pth.organism = organism if pth.respond_to? :organism }
|
52
72
|
end
|
53
73
|
end
|
54
74
|
|
75
|
+
if defined? Gene and Entity === Gene
|
76
|
+
module Gene
|
77
|
+
self.format = "KEGG Gene ID"
|
78
|
+
|
79
|
+
def to_kegg
|
80
|
+
return self if format == "KEGG Gene ID"
|
81
|
+
if Array === self
|
82
|
+
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism)
|
83
|
+
else
|
84
|
+
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def _from_kegg
|
89
|
+
return self.clean_annotations unless format == "KEGG Gene ID"
|
90
|
+
if Array === self
|
91
|
+
KEGG.index2ens.values_at(*self)
|
92
|
+
else
|
93
|
+
KEGG.index2ens[self]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def from_kegg
|
98
|
+
return self unless format == "KEGG Gene ID"
|
99
|
+
Gene.setup(_from_kegg, "Ensembl Gene ID", organism)
|
100
|
+
end
|
101
|
+
|
102
|
+
property :_to => :array2single do |new_format|
|
103
|
+
return self if format == new_format
|
104
|
+
list = self._from_kegg
|
105
|
+
|
106
|
+
tsv = Translation.job(:tsv_translate, "", :organism => organism, :genes => list, :format => new_format).exec.tap{|o| o.unnamed = true}
|
55
107
|
|
108
|
+
tsv.values_at(*list)
|
109
|
+
end
|
56
110
|
|
111
|
+
property :to! => :array2single do |new_format|
|
112
|
+
return self if format == new_format
|
113
|
+
|
114
|
+
new = _to(new_format)
|
115
|
+
new.each_with_index do |n,i|
|
116
|
+
c = self.annotated_array_clean_get_brackets(i)
|
117
|
+
if c.nil? or n.nil?
|
118
|
+
self[i] = nil
|
119
|
+
else
|
120
|
+
c.replace n
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
property :to => :array2single do |new_format|
|
126
|
+
return self if format == new_format
|
127
|
+
Gene.setup(_to(new_format), new_format, organism)
|
128
|
+
end
|
129
|
+
|
130
|
+
property :kegg_pathways => :array2single do
|
131
|
+
@kegg_pathways ||= KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true).values_at(*self.to_kegg).
|
132
|
+
each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
57
136
|
end
|
data/lib/rbbt/sources/matador.rb
CHANGED
data/lib/rbbt/sources/string.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
#Associated Gene Name Name GeneID Chr Chr Band Cancer Somatic Mut Cancer Germline Mut Tumour Types (Somatic Mutations) Tumour Types (Germline Mutations) Cancer Syndrome Tissue Type Cancer Molecular Genetics Mutation Type Translocation Partner Other Germline Mut Other Syndrome/Disease
|
2
2
|
ABL1 v-abl Abelson murine leukemia viral oncogene homolog 1 25 9 9q34.1 yes CML, ALL, T-ALL L Dom T, Mis BCR, ETV6, NUP214
|
3
3
|
ABL2 v-abl Abelson murine leukemia viral oncogene homolog 2 27 1 1q24-q25 yes AML L Dom T ETV6
|
4
4
|
ACSL3 acyl-CoA synthetase long-chain family member 3 2181 2 2q36 yes prostate E Dom T ETV1
|
@@ -2,9 +2,8 @@ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
|
2
2
|
|
3
3
|
define_source_tasks "matador" => "http://matador.embl.de/media/download/matador.tsv.gz"
|
4
4
|
|
5
|
-
|
6
5
|
process_tsv :protein_drug, 'matador',
|
7
|
-
:
|
6
|
+
:key_field => 3,
|
8
7
|
:fix => lambda{|l| l.sub(/9606./,'')},
|
9
8
|
:fields => [1,0,7,8,9,10,11,12],
|
10
9
|
:header_hash => "",
|
@@ -3,16 +3,15 @@ require 'rbbt/mutation/mutation_assessor'
|
|
3
3
|
|
4
4
|
class TestMutationAssessor < Test::Unit::TestCase
|
5
5
|
|
6
|
-
def
|
6
|
+
def test_predict_aminoacid_mutation
|
7
7
|
mutations = {
|
8
8
|
"EGFR_HUMAN" => %w(R521K)
|
9
9
|
}
|
10
10
|
|
11
|
-
puts MutationAssessor.predict(mutations)
|
12
11
|
assert_equal 1, MutationAssessor.predict(mutations).length
|
13
12
|
end
|
14
13
|
|
15
|
-
def
|
14
|
+
def test_predict_aminoacid_mutation_tsv
|
16
15
|
tsv = TSV.setup({"EGFR_HUMAN" => [%w(R521K)]}, :key_field => "UniProt/SwissProt ID", :fields => ["Protein Mutation"], :type => :double)
|
17
16
|
|
18
17
|
assert_equal "neutral", MutationAssessor.add_predictions(tsv).slice("MutationAssessor:Prediction").values.first.flatten.first
|
@@ -26,7 +25,7 @@ class TestMutationAssessor < Test::Unit::TestCase
|
|
26
25
|
"P53_HUMAN" => %w(R21K),
|
27
26
|
}
|
28
27
|
|
29
|
-
|
28
|
+
assert(MutationAssessor.chunked_predict(mutations).include? "EGFR_HUMAN R521K")
|
30
29
|
end
|
31
30
|
|
32
31
|
|
@@ -16,10 +16,18 @@ class TestSIFT < Test::Unit::TestCase
|
|
16
16
|
|
17
17
|
assert_equal "TOLERATED", SIFT.predict_aminoacid_mutation_batch( [[accession, mutation]]).first[3]
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
|
+
def test_predict
|
20
21
|
ensp = "ENSP00000224605"
|
21
22
|
mutation = "A63T"
|
22
|
-
assert_equal "TOLERATED", SIFT.predict( [[ensp, mutation]]).first[
|
23
|
+
assert_equal "TOLERATED", SIFT.predict( [[ensp, mutation] * ":"]).values.first["Prediction"]
|
23
24
|
end
|
24
25
|
|
26
|
+
def test_chunked_predict
|
27
|
+
ensp = "ENSP00000224605"
|
28
|
+
mutation = "A63T"
|
29
|
+
assert_equal "TOLERATED", SIFT.chunked_predict( [[ensp, mutation] * ":"] * 2000).values.first["Prediction"]
|
30
|
+
end
|
31
|
+
|
32
|
+
|
25
33
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-phgx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
7
|
- 1
|
10
|
-
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-01-13 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -56,11 +56,9 @@ files:
|
|
56
56
|
- lib/rbbt/sources/hprd.rb
|
57
57
|
- lib/rbbt/sources/kegg.rb
|
58
58
|
- lib/rbbt/sources/matador.rb
|
59
|
-
- lib/rbbt/sources/nci.rb
|
60
59
|
- lib/rbbt/sources/pharmagkb.rb
|
61
60
|
- lib/rbbt/sources/pina.rb
|
62
61
|
- lib/rbbt/sources/reactome.rb
|
63
|
-
- lib/rbbt/sources/sift.rb
|
64
62
|
- lib/rbbt/sources/stitch.rb
|
65
63
|
- lib/rbbt/sources/string.rb
|
66
64
|
- share/Cancer/anais_annotations
|
data/lib/rbbt/sources/nci.rb
DELETED