rbbt-entities 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/entity.rb +36 -29
- data/lib/rbbt/entity/gene.rb +141 -65
- data/lib/rbbt/entity/genomic_mutation.rb +138 -41
- data/lib/rbbt/entity/genotype.rb +41 -23
- data/lib/rbbt/entity/misc.rb +0 -21
- data/lib/rbbt/entity/mutated_isoform.rb +143 -86
- data/lib/rbbt/entity/pmid.rb +13 -3
- data/lib/rbbt/entity/protein.rb +39 -7
- data/lib/rbbt/entity/transcript.rb +69 -0
- data/test/rbbt/entity/test_gene.rb +1 -1
- data/test/rbbt/entity/test_genomic_mutation.rb +0 -10
- data/test/rbbt/test_entity.rb +101 -0
- metadata +5 -4
data/lib/rbbt/entity/genotype.rb
CHANGED
@@ -27,10 +27,10 @@ module Genotype
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def self.extended(
|
31
|
-
prev_genotype_cohort_extended(
|
30
|
+
def self.extended(cohort)
|
31
|
+
prev_genotype_cohort_extended(cohort) if self.respond_to? :prev_genotype_cohort_extended
|
32
32
|
|
33
|
-
class <<
|
33
|
+
class << cohort
|
34
34
|
attr_accessor :metagenotype
|
35
35
|
|
36
36
|
def jobname
|
@@ -43,24 +43,29 @@ module Genotype
|
|
43
43
|
|
44
44
|
def metagenotype
|
45
45
|
if @metagenotype.nil?
|
46
|
-
@metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].
|
46
|
+
@metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].orig_watson)
|
47
47
|
@metagenotype.extend Genotype unless Genotype === @metagenotype
|
48
48
|
end
|
49
49
|
@metagenotype
|
50
50
|
end
|
51
|
-
end unless
|
51
|
+
end unless cohort.respond_to? :metagenotype
|
52
52
|
|
53
|
-
|
53
|
+
cohort.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
|
54
54
|
|
55
|
-
|
56
|
-
|
55
|
+
cohort.helper :metagenotype do
|
56
|
+
cohort.metagenotype
|
57
57
|
end
|
58
58
|
|
59
|
-
|
60
|
-
|
59
|
+
cohort.helper :samples do
|
60
|
+
cohort
|
61
61
|
end
|
62
62
|
|
63
|
-
NamedArray.setup(
|
63
|
+
NamedArray.setup(cohort, cohort.collect{|genotype| genotype.jobname})
|
64
|
+
end
|
65
|
+
|
66
|
+
def subset(genotypes)
|
67
|
+
new = self.values_at *(genotypes & fields)
|
68
|
+
new.extend Cohort
|
64
69
|
end
|
65
70
|
|
66
71
|
returns "Ensembl Gene ID"
|
@@ -70,12 +75,13 @@ module Genotype
|
|
70
75
|
end
|
71
76
|
|
72
77
|
returns "Ensembl Gene ID"
|
73
|
-
|
78
|
+
input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
|
79
|
+
input :threshold, :float, "from 0 to 1", 0.8
|
80
|
+
task :damaged_genes => :array do |methods, threshold|
|
74
81
|
set_info :organism, metagenotype.organism
|
75
|
-
samples.collect{|genotype| genotype.damaged_genes}.flatten.uniq
|
82
|
+
samples.collect{|genotype| genotype.damaged_genes(:methods => methods, :threshold => threshold)}.flatten.uniq
|
76
83
|
end
|
77
84
|
|
78
|
-
|
79
85
|
returns "Ensembl Gene ID"
|
80
86
|
task :recurrent_genes => :array do
|
81
87
|
set_info :organism, metagenotype.organism
|
@@ -86,8 +92,10 @@ module Genotype
|
|
86
92
|
|
87
93
|
%w(damaged_genes recurrent_genes all_affected_genes).each do |name|
|
88
94
|
define_method name do |*args|
|
95
|
+
options = args.first
|
89
96
|
@cache ||= {}
|
90
|
-
|
97
|
+
key = [name, Misc.hash2md5(options || {})]
|
98
|
+
@cache[key] ||= self.job(name, self.jobname, options || {}).run
|
91
99
|
end
|
92
100
|
end
|
93
101
|
|
@@ -119,19 +127,27 @@ module Genotype
|
|
119
127
|
end
|
120
128
|
|
121
129
|
returns "Ensembl Gene ID"
|
122
|
-
|
123
|
-
|
130
|
+
task :with_non_synonymous_mutations => :array do
|
131
|
+
set_info :organism, genotype.organism
|
132
|
+
genotype.mutated_isoforms.flatten.compact.reject{|mutated_isoform| ["SYNONYMOUS", "UTR"].include? mutated_isoform.consequence}.transcript.gene.uniq
|
133
|
+
end
|
134
|
+
|
135
|
+
returns "Ensembl Gene ID"
|
136
|
+
input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
|
137
|
+
input :threshold, :float, "from 0 to 1", 0.8
|
138
|
+
task :with_damaged_isoforms => :array do |methods,threshold|
|
124
139
|
set_info :organism, genotype.organism
|
125
|
-
mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores}
|
140
|
+
mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores(methods)}
|
126
141
|
genotype.select{|mutation| if mutation.mutated_isoforms then mutated_isoform_damage.values_at(*mutation.mutated_isoforms.flatten.compact).select{|score| not score.nil? and score > threshold}.any? else false; end}.genes.flatten.uniq.clean_annotations
|
127
142
|
end
|
128
143
|
|
129
144
|
returns "Ensembl Gene ID"
|
130
145
|
task :truncated => :array do
|
131
146
|
set_info :organism, genotype.organism
|
132
|
-
MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").
|
133
|
-
|
134
|
-
|
147
|
+
truncated_isoforms = MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").select{|isoform_mutation| isoform_mutation.truncated }
|
148
|
+
proteins = truncated_isoforms.protein
|
149
|
+
genes = proteins.gene
|
150
|
+
genes.to("Ensembl Gene ID").uniq.clean_annotations
|
135
151
|
end
|
136
152
|
|
137
153
|
returns "Ensembl Gene ID"
|
@@ -152,10 +168,12 @@ module Genotype
|
|
152
168
|
(with_damaged_isoforms + truncated + affected_exon_junctions).uniq
|
153
169
|
end
|
154
170
|
|
155
|
-
%w(all_affected_genes damaged_genes truncated with_damaged_isoforms affected_exon_junctions long_genes recurrent_genes).each do |name|
|
171
|
+
%w(all_affected_genes damaged_genes truncated with_damaged_isoforms with_non_synonymous_mutations affected_exon_junctions long_genes recurrent_genes).each do |name|
|
156
172
|
define_method name do |*args|
|
173
|
+
options = args.first
|
157
174
|
@cache ||= {}
|
158
|
-
|
175
|
+
key = [name, Misc.hash2md5(options || {})]
|
176
|
+
@cache[key] ||= self.job(name, self.jobname, options || {}).run
|
159
177
|
end
|
160
178
|
end
|
161
179
|
end
|
data/lib/rbbt/entity/misc.rb
CHANGED
@@ -3,24 +3,3 @@ require 'rbbt/workflow'
|
|
3
3
|
require 'rbbt/sources/go'
|
4
4
|
require 'rbbt/sources/organism'
|
5
5
|
require 'rbbt/entity/gene'
|
6
|
-
|
7
|
-
module GOTerm
|
8
|
-
extend Entity
|
9
|
-
self.annotation :organism
|
10
|
-
|
11
|
-
self.format = ["GO Term", "GO ID"]
|
12
|
-
|
13
|
-
def name
|
14
|
-
if Array === self
|
15
|
-
self.collect{|id| GO.id2name(id)}
|
16
|
-
else
|
17
|
-
GO.id2name(self)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def genes
|
22
|
-
go2genes = Organism.gene_go(organism).tsv(:key_field => "GO ID", :fields => ["Ensembl Gene ID"], :merge => true, :persist => true)
|
23
|
-
go2genes.unnamed = true
|
24
|
-
Gene.setup(go2genes[self].first, "Ensembl Gene ID", organism)
|
25
|
-
end
|
26
|
-
end
|
@@ -4,6 +4,7 @@ require 'rbbt/sources/organism'
|
|
4
4
|
require 'rbbt/mutation/mutation_assessor'
|
5
5
|
require 'rbbt/mutation/sift'
|
6
6
|
require 'rbbt/entity/protein'
|
7
|
+
require 'rbbt/sources/uniprot'
|
7
8
|
require 'rbbt/entity/gene'
|
8
9
|
require 'nokogiri'
|
9
10
|
|
@@ -13,31 +14,41 @@ module MutatedIsoform
|
|
13
14
|
|
14
15
|
self.format = "Mutated Isoform"
|
15
16
|
|
16
|
-
property :protein do
|
17
|
-
if
|
18
|
-
Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
|
19
|
-
else
|
20
|
-
Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
|
21
|
-
end
|
17
|
+
property :protein => :array2single do
|
18
|
+
Protein.setup(self.collect{|mutation| mutation.split(":").first if mutation =~ /^ENSP/}, "Ensembl Protein ID", organism)
|
22
19
|
end
|
20
|
+
persist :protein
|
23
21
|
|
24
|
-
property :
|
25
|
-
|
22
|
+
property :transcript => :array2single do
|
23
|
+
begin
|
24
|
+
protein = self.protein
|
25
|
+
Transcript.setup(protein.transcript.zip(self.collect{|mutation| mutation.split(":").first}).collect{|p| p.compact.first}, "Ensembl Transcript ID", organism)
|
26
|
+
end
|
26
27
|
end
|
28
|
+
persist :transcript
|
27
29
|
|
28
|
-
property :
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
30
|
+
property :change => :array2single do
|
31
|
+
self.collect{|mi| mi.split(":").last}
|
32
|
+
end
|
33
|
+
persist :change
|
34
|
+
|
35
|
+
property :position => :array2single do
|
36
|
+
change.collect{|c|
|
37
|
+
if c.match(/[^\d](\d+)[^\d]/)
|
38
|
+
$1.to_i
|
39
|
+
else
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
}
|
34
43
|
end
|
35
|
-
|
44
|
+
persist :position
|
45
|
+
|
36
46
|
property :ensembl_protein_image_url => :single2array do
|
37
47
|
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
38
48
|
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
39
49
|
end
|
40
|
-
|
50
|
+
persist :ensembl_protein_image_url
|
51
|
+
|
41
52
|
property :marked_svg => :single2array do
|
42
53
|
svg = Open.read(protein.ensembl_protein_image_url)
|
43
54
|
seq_len = protein.sequence_length
|
@@ -56,10 +67,11 @@ module MutatedIsoform
|
|
56
67
|
svg
|
57
68
|
end
|
58
69
|
end
|
70
|
+
persist :marked_svg
|
59
71
|
|
60
72
|
ASTERISK = "*"[0]
|
61
73
|
CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
|
62
|
-
property :
|
74
|
+
property :consequence => :single2array do
|
63
75
|
prot, change = self.split(":")
|
64
76
|
|
65
77
|
case
|
@@ -79,14 +91,15 @@ module MutatedIsoform
|
|
79
91
|
"MISS-SENSE"
|
80
92
|
end
|
81
93
|
end
|
94
|
+
persist :consequence
|
82
95
|
|
83
96
|
property :truncated => :array2single do
|
84
|
-
|
85
|
-
|
86
|
-
|
97
|
+
begin
|
98
|
+
protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
|
99
|
+
self.collect do |isoform_mutation|
|
87
100
|
|
88
|
-
next if isoform_mutation.
|
89
|
-
protein
|
101
|
+
next if isoform_mutation.consequence != "FRAMESHIFT" and isoform_mutation.consequence != "NONSENSE"
|
102
|
+
protein = isoform_mutation.protein
|
90
103
|
position = isoform_mutation.position
|
91
104
|
sequence_length = protein2sequence_length[protein]
|
92
105
|
|
@@ -100,80 +113,124 @@ module MutatedIsoform
|
|
100
113
|
end
|
101
114
|
end
|
102
115
|
end
|
103
|
-
|
104
116
|
end
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
+
persist :truncated
|
118
|
+
|
119
|
+
property :damage_scores => :array2single do |*args|
|
120
|
+
begin
|
121
|
+
methods = args.first
|
122
|
+
methods = [:sift, :mutation_assessor] if methods.nil?
|
123
|
+
methods = [methods] unless Array === methods
|
124
|
+
values = methods.collect{|method|
|
125
|
+
case method.to_sym
|
126
|
+
when :sift
|
127
|
+
sift_scores
|
128
|
+
when :mutation_assessor
|
129
|
+
mutation_assessor_scores
|
130
|
+
else
|
131
|
+
raise "Unknown predictive method: #{ method }"
|
132
|
+
end
|
133
|
+
}
|
134
|
+
if values.compact.empty?
|
135
|
+
return [nil] * self.length
|
136
|
+
else
|
137
|
+
scores = values.shift
|
138
|
+
scores = scores.zip(*values)
|
139
|
+
|
140
|
+
scores.collect{|p|
|
141
|
+
p = p.compact
|
142
|
+
if p.empty?
|
143
|
+
nil
|
144
|
+
else
|
145
|
+
p.inject(0.0){|acc, e| acc += e} / p.length
|
146
|
+
end
|
147
|
+
}
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
persist :damage_scores
|
152
|
+
|
153
|
+
property :damaged? => :array2single do |*args|
|
154
|
+
begin
|
155
|
+
methods, threshold = args
|
156
|
+
threshold = 0.8 if threshold.nil?
|
157
|
+
damage_scores = self.damage_scores(methods)
|
158
|
+
truncated = self.truncated
|
159
|
+
damage_scores.zip(truncated).collect{|damage, truncated| truncated or (not damage.nil? and damage > threshold) }
|
160
|
+
end
|
117
161
|
end
|
162
|
+
persist :damaged?
|
118
163
|
|
119
164
|
property :sift_scores => :array2single do
|
120
|
-
|
121
|
-
|
165
|
+
begin
|
166
|
+
missense = self.select{|iso_mut| iso_mut.consequence == "MISS-SENSE"}
|
122
167
|
|
123
|
-
|
124
|
-
|
125
|
-
|
168
|
+
values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
|
169
|
+
v.nil? ? nil : 1.0 - v["Score 1"].to_f
|
170
|
+
}
|
126
171
|
|
127
|
-
|
128
|
-
"" => nil,
|
129
|
-
"TOLERATED" => 0,
|
130
|
-
"*DAMAGING" => 1,
|
131
|
-
"DAMAGING" => 1}
|
172
|
+
values
|
132
173
|
|
133
|
-
|
134
|
-
|
174
|
+
#range = {nil => nil,
|
175
|
+
# "" => nil,
|
176
|
+
# "TOLERATED" => 0,
|
177
|
+
# "*DAMAGING" => 1,
|
178
|
+
# "DAMAGING" => 1}
|
179
|
+
|
180
|
+
#range.values_at *values
|
181
|
+
end
|
135
182
|
end
|
183
|
+
persist :sift_scores
|
136
184
|
|
137
185
|
property :mutation_assessor_scores => :array2single do
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
186
|
+
begin
|
187
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
188
|
+
|
189
|
+
correspondance = {}
|
190
|
+
list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
|
191
|
+
prot, change = mutation.split(":")
|
192
|
+
next if uniprot.nil?
|
193
|
+
uniprot_change = [uniprot.upcase, change.upcase]
|
194
|
+
correspondance[uniprot_change] ||= []
|
195
|
+
correspondance[uniprot_change] << mutation
|
196
|
+
uniprot_change
|
197
|
+
end.compact
|
198
|
+
|
199
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
|
200
|
+
return [nil] * self.length if list.empty?
|
201
|
+
|
202
|
+
tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
|
203
|
+
|
204
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
|
205
|
+
return [nil] * self.length if tsv.empty?
|
206
|
+
|
207
|
+
new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
|
208
|
+
|
209
|
+
tsv.each do |key, values|
|
210
|
+
uniprot, change = key.split(" ")
|
211
|
+
uniprot_change = [uniprot.upcase, change.upcase]
|
212
|
+
correspondance[uniprot_change].each do |mutation|
|
213
|
+
new[mutation] = values["Func. Impact"]
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
|
218
|
+
range = {nil => nil,
|
219
|
+
"" => nil,
|
220
|
+
"neutral" => 0,
|
221
|
+
"low" => 0.5,
|
222
|
+
"medium" => 0.7,
|
223
|
+
"high" => 1.0}
|
224
|
+
|
225
|
+
range.values_at *new.values_at(*self)
|
226
|
+
end
|
177
227
|
end
|
228
|
+
persist :mutation_assessor_scores
|
178
229
|
|
230
|
+
property :pdbs => :single do
|
231
|
+
uniprot = self.transcript.protein.uniprot
|
232
|
+
next if uniprot.nil?
|
233
|
+
Uniprot.pdbs_covering_aa_position(uniprot, self.position)
|
234
|
+
end
|
235
|
+
persist :pdbs
|
179
236
|
end
|
data/lib/rbbt/entity/pmid.rb
CHANGED
@@ -6,14 +6,24 @@ module PMID
|
|
6
6
|
|
7
7
|
self.format = "PMID"
|
8
8
|
|
9
|
+
property :article => :array2single do
|
10
|
+
PubMed.get_article(self).values_at(*self)
|
11
|
+
end
|
12
|
+
persist :article
|
13
|
+
|
9
14
|
property :title => :array2single do
|
10
|
-
|
11
|
-
|
12
|
-
|
15
|
+
article.collect{|a| a.nil? ? nil : a.title}
|
16
|
+
end
|
17
|
+
persist :title
|
18
|
+
|
19
|
+
property :text => :array2single do
|
20
|
+
article.collect{|a| a.nil? ? nil : a.text}
|
13
21
|
end
|
22
|
+
persist :text
|
14
23
|
|
15
24
|
property :pubmed_url => :single2array do
|
16
25
|
"<a class='pmid' href='http://www.ncbi.nlm.nih.gov/pubmed/#{self}'>#{ self }</a>"
|
17
26
|
end
|
27
|
+
persist :pubmed_url
|
18
28
|
end
|
19
29
|
|