rbbt-entities 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/entity.rb +36 -29
- data/lib/rbbt/entity/gene.rb +141 -65
- data/lib/rbbt/entity/genomic_mutation.rb +138 -41
- data/lib/rbbt/entity/genotype.rb +41 -23
- data/lib/rbbt/entity/misc.rb +0 -21
- data/lib/rbbt/entity/mutated_isoform.rb +143 -86
- data/lib/rbbt/entity/pmid.rb +13 -3
- data/lib/rbbt/entity/protein.rb +39 -7
- data/lib/rbbt/entity/transcript.rb +69 -0
- data/test/rbbt/entity/test_gene.rb +1 -1
- data/test/rbbt/entity/test_genomic_mutation.rb +0 -10
- data/test/rbbt/test_entity.rb +101 -0
- metadata +5 -4
data/lib/rbbt/entity/genotype.rb
CHANGED
@@ -27,10 +27,10 @@ module Genotype
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def self.extended(
|
31
|
-
prev_genotype_cohort_extended(
|
30
|
+
def self.extended(cohort)
|
31
|
+
prev_genotype_cohort_extended(cohort) if self.respond_to? :prev_genotype_cohort_extended
|
32
32
|
|
33
|
-
class <<
|
33
|
+
class << cohort
|
34
34
|
attr_accessor :metagenotype
|
35
35
|
|
36
36
|
def jobname
|
@@ -43,24 +43,29 @@ module Genotype
|
|
43
43
|
|
44
44
|
def metagenotype
|
45
45
|
if @metagenotype.nil?
|
46
|
-
@metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].
|
46
|
+
@metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].orig_watson)
|
47
47
|
@metagenotype.extend Genotype unless Genotype === @metagenotype
|
48
48
|
end
|
49
49
|
@metagenotype
|
50
50
|
end
|
51
|
-
end unless
|
51
|
+
end unless cohort.respond_to? :metagenotype
|
52
52
|
|
53
|
-
|
53
|
+
cohort.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
|
54
54
|
|
55
|
-
|
56
|
-
|
55
|
+
cohort.helper :metagenotype do
|
56
|
+
cohort.metagenotype
|
57
57
|
end
|
58
58
|
|
59
|
-
|
60
|
-
|
59
|
+
cohort.helper :samples do
|
60
|
+
cohort
|
61
61
|
end
|
62
62
|
|
63
|
-
NamedArray.setup(
|
63
|
+
NamedArray.setup(cohort, cohort.collect{|genotype| genotype.jobname})
|
64
|
+
end
|
65
|
+
|
66
|
+
def subset(genotypes)
|
67
|
+
new = self.values_at *(genotypes & fields)
|
68
|
+
new.extend Cohort
|
64
69
|
end
|
65
70
|
|
66
71
|
returns "Ensembl Gene ID"
|
@@ -70,12 +75,13 @@ module Genotype
|
|
70
75
|
end
|
71
76
|
|
72
77
|
returns "Ensembl Gene ID"
|
73
|
-
|
78
|
+
input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
|
79
|
+
input :threshold, :float, "from 0 to 1", 0.8
|
80
|
+
task :damaged_genes => :array do |methods, threshold|
|
74
81
|
set_info :organism, metagenotype.organism
|
75
|
-
samples.collect{|genotype| genotype.damaged_genes}.flatten.uniq
|
82
|
+
samples.collect{|genotype| genotype.damaged_genes(:methods => methods, :threshold => threshold)}.flatten.uniq
|
76
83
|
end
|
77
84
|
|
78
|
-
|
79
85
|
returns "Ensembl Gene ID"
|
80
86
|
task :recurrent_genes => :array do
|
81
87
|
set_info :organism, metagenotype.organism
|
@@ -86,8 +92,10 @@ module Genotype
|
|
86
92
|
|
87
93
|
%w(damaged_genes recurrent_genes all_affected_genes).each do |name|
|
88
94
|
define_method name do |*args|
|
95
|
+
options = args.first
|
89
96
|
@cache ||= {}
|
90
|
-
|
97
|
+
key = [name, Misc.hash2md5(options || {})]
|
98
|
+
@cache[key] ||= self.job(name, self.jobname, options || {}).run
|
91
99
|
end
|
92
100
|
end
|
93
101
|
|
@@ -119,19 +127,27 @@ module Genotype
|
|
119
127
|
end
|
120
128
|
|
121
129
|
returns "Ensembl Gene ID"
|
122
|
-
|
123
|
-
|
130
|
+
task :with_non_synonymous_mutations => :array do
|
131
|
+
set_info :organism, genotype.organism
|
132
|
+
genotype.mutated_isoforms.flatten.compact.reject{|mutated_isoform| ["SYNONYMOUS", "UTR"].include? mutated_isoform.consequence}.transcript.gene.uniq
|
133
|
+
end
|
134
|
+
|
135
|
+
returns "Ensembl Gene ID"
|
136
|
+
input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
|
137
|
+
input :threshold, :float, "from 0 to 1", 0.8
|
138
|
+
task :with_damaged_isoforms => :array do |methods,threshold|
|
124
139
|
set_info :organism, genotype.organism
|
125
|
-
mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores}
|
140
|
+
mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores(methods)}
|
126
141
|
genotype.select{|mutation| if mutation.mutated_isoforms then mutated_isoform_damage.values_at(*mutation.mutated_isoforms.flatten.compact).select{|score| not score.nil? and score > threshold}.any? else false; end}.genes.flatten.uniq.clean_annotations
|
127
142
|
end
|
128
143
|
|
129
144
|
returns "Ensembl Gene ID"
|
130
145
|
task :truncated => :array do
|
131
146
|
set_info :organism, genotype.organism
|
132
|
-
MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").
|
133
|
-
|
134
|
-
|
147
|
+
truncated_isoforms = MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").select{|isoform_mutation| isoform_mutation.truncated }
|
148
|
+
proteins = truncated_isoforms.protein
|
149
|
+
genes = proteins.gene
|
150
|
+
genes.to("Ensembl Gene ID").uniq.clean_annotations
|
135
151
|
end
|
136
152
|
|
137
153
|
returns "Ensembl Gene ID"
|
@@ -152,10 +168,12 @@ module Genotype
|
|
152
168
|
(with_damaged_isoforms + truncated + affected_exon_junctions).uniq
|
153
169
|
end
|
154
170
|
|
155
|
-
%w(all_affected_genes damaged_genes truncated with_damaged_isoforms affected_exon_junctions long_genes recurrent_genes).each do |name|
|
171
|
+
%w(all_affected_genes damaged_genes truncated with_damaged_isoforms with_non_synonymous_mutations affected_exon_junctions long_genes recurrent_genes).each do |name|
|
156
172
|
define_method name do |*args|
|
173
|
+
options = args.first
|
157
174
|
@cache ||= {}
|
158
|
-
|
175
|
+
key = [name, Misc.hash2md5(options || {})]
|
176
|
+
@cache[key] ||= self.job(name, self.jobname, options || {}).run
|
159
177
|
end
|
160
178
|
end
|
161
179
|
end
|
data/lib/rbbt/entity/misc.rb
CHANGED
@@ -3,24 +3,3 @@ require 'rbbt/workflow'
|
|
3
3
|
require 'rbbt/sources/go'
|
4
4
|
require 'rbbt/sources/organism'
|
5
5
|
require 'rbbt/entity/gene'
|
6
|
-
|
7
|
-
module GOTerm
|
8
|
-
extend Entity
|
9
|
-
self.annotation :organism
|
10
|
-
|
11
|
-
self.format = ["GO Term", "GO ID"]
|
12
|
-
|
13
|
-
def name
|
14
|
-
if Array === self
|
15
|
-
self.collect{|id| GO.id2name(id)}
|
16
|
-
else
|
17
|
-
GO.id2name(self)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def genes
|
22
|
-
go2genes = Organism.gene_go(organism).tsv(:key_field => "GO ID", :fields => ["Ensembl Gene ID"], :merge => true, :persist => true)
|
23
|
-
go2genes.unnamed = true
|
24
|
-
Gene.setup(go2genes[self].first, "Ensembl Gene ID", organism)
|
25
|
-
end
|
26
|
-
end
|
@@ -4,6 +4,7 @@ require 'rbbt/sources/organism'
|
|
4
4
|
require 'rbbt/mutation/mutation_assessor'
|
5
5
|
require 'rbbt/mutation/sift'
|
6
6
|
require 'rbbt/entity/protein'
|
7
|
+
require 'rbbt/sources/uniprot'
|
7
8
|
require 'rbbt/entity/gene'
|
8
9
|
require 'nokogiri'
|
9
10
|
|
@@ -13,31 +14,41 @@ module MutatedIsoform
|
|
13
14
|
|
14
15
|
self.format = "Mutated Isoform"
|
15
16
|
|
16
|
-
property :protein do
|
17
|
-
if
|
18
|
-
Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
|
19
|
-
else
|
20
|
-
Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
|
21
|
-
end
|
17
|
+
property :protein => :array2single do
|
18
|
+
Protein.setup(self.collect{|mutation| mutation.split(":").first if mutation =~ /^ENSP/}, "Ensembl Protein ID", organism)
|
22
19
|
end
|
20
|
+
persist :protein
|
23
21
|
|
24
|
-
property :
|
25
|
-
|
22
|
+
property :transcript => :array2single do
|
23
|
+
begin
|
24
|
+
protein = self.protein
|
25
|
+
Transcript.setup(protein.transcript.zip(self.collect{|mutation| mutation.split(":").first}).collect{|p| p.compact.first}, "Ensembl Transcript ID", organism)
|
26
|
+
end
|
26
27
|
end
|
28
|
+
persist :transcript
|
27
29
|
|
28
|
-
property :
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
30
|
+
property :change => :array2single do
|
31
|
+
self.collect{|mi| mi.split(":").last}
|
32
|
+
end
|
33
|
+
persist :change
|
34
|
+
|
35
|
+
property :position => :array2single do
|
36
|
+
change.collect{|c|
|
37
|
+
if c.match(/[^\d](\d+)[^\d]/)
|
38
|
+
$1.to_i
|
39
|
+
else
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
}
|
34
43
|
end
|
35
|
-
|
44
|
+
persist :position
|
45
|
+
|
36
46
|
property :ensembl_protein_image_url => :single2array do
|
37
47
|
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
38
48
|
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
39
49
|
end
|
40
|
-
|
50
|
+
persist :ensembl_protein_image_url
|
51
|
+
|
41
52
|
property :marked_svg => :single2array do
|
42
53
|
svg = Open.read(protein.ensembl_protein_image_url)
|
43
54
|
seq_len = protein.sequence_length
|
@@ -56,10 +67,11 @@ module MutatedIsoform
|
|
56
67
|
svg
|
57
68
|
end
|
58
69
|
end
|
70
|
+
persist :marked_svg
|
59
71
|
|
60
72
|
ASTERISK = "*"[0]
|
61
73
|
CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
|
62
|
-
property :
|
74
|
+
property :consequence => :single2array do
|
63
75
|
prot, change = self.split(":")
|
64
76
|
|
65
77
|
case
|
@@ -79,14 +91,15 @@ module MutatedIsoform
|
|
79
91
|
"MISS-SENSE"
|
80
92
|
end
|
81
93
|
end
|
94
|
+
persist :consequence
|
82
95
|
|
83
96
|
property :truncated => :array2single do
|
84
|
-
|
85
|
-
|
86
|
-
|
97
|
+
begin
|
98
|
+
protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
|
99
|
+
self.collect do |isoform_mutation|
|
87
100
|
|
88
|
-
next if isoform_mutation.
|
89
|
-
protein
|
101
|
+
next if isoform_mutation.consequence != "FRAMESHIFT" and isoform_mutation.consequence != "NONSENSE"
|
102
|
+
protein = isoform_mutation.protein
|
90
103
|
position = isoform_mutation.position
|
91
104
|
sequence_length = protein2sequence_length[protein]
|
92
105
|
|
@@ -100,80 +113,124 @@ module MutatedIsoform
|
|
100
113
|
end
|
101
114
|
end
|
102
115
|
end
|
103
|
-
|
104
116
|
end
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
+
persist :truncated
|
118
|
+
|
119
|
+
property :damage_scores => :array2single do |*args|
|
120
|
+
begin
|
121
|
+
methods = args.first
|
122
|
+
methods = [:sift, :mutation_assessor] if methods.nil?
|
123
|
+
methods = [methods] unless Array === methods
|
124
|
+
values = methods.collect{|method|
|
125
|
+
case method.to_sym
|
126
|
+
when :sift
|
127
|
+
sift_scores
|
128
|
+
when :mutation_assessor
|
129
|
+
mutation_assessor_scores
|
130
|
+
else
|
131
|
+
raise "Unknown predictive method: #{ method }"
|
132
|
+
end
|
133
|
+
}
|
134
|
+
if values.compact.empty?
|
135
|
+
return [nil] * self.length
|
136
|
+
else
|
137
|
+
scores = values.shift
|
138
|
+
scores = scores.zip(*values)
|
139
|
+
|
140
|
+
scores.collect{|p|
|
141
|
+
p = p.compact
|
142
|
+
if p.empty?
|
143
|
+
nil
|
144
|
+
else
|
145
|
+
p.inject(0.0){|acc, e| acc += e} / p.length
|
146
|
+
end
|
147
|
+
}
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
persist :damage_scores
|
152
|
+
|
153
|
+
property :damaged? => :array2single do |*args|
|
154
|
+
begin
|
155
|
+
methods, threshold = args
|
156
|
+
threshold = 0.8 if threshold.nil?
|
157
|
+
damage_scores = self.damage_scores(methods)
|
158
|
+
truncated = self.truncated
|
159
|
+
damage_scores.zip(truncated).collect{|damage, truncated| truncated or (not damage.nil? and damage > threshold) }
|
160
|
+
end
|
117
161
|
end
|
162
|
+
persist :damaged?
|
118
163
|
|
119
164
|
property :sift_scores => :array2single do
|
120
|
-
|
121
|
-
|
165
|
+
begin
|
166
|
+
missense = self.select{|iso_mut| iso_mut.consequence == "MISS-SENSE"}
|
122
167
|
|
123
|
-
|
124
|
-
|
125
|
-
|
168
|
+
values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
|
169
|
+
v.nil? ? nil : 1.0 - v["Score 1"].to_f
|
170
|
+
}
|
126
171
|
|
127
|
-
|
128
|
-
"" => nil,
|
129
|
-
"TOLERATED" => 0,
|
130
|
-
"*DAMAGING" => 1,
|
131
|
-
"DAMAGING" => 1}
|
172
|
+
values
|
132
173
|
|
133
|
-
|
134
|
-
|
174
|
+
#range = {nil => nil,
|
175
|
+
# "" => nil,
|
176
|
+
# "TOLERATED" => 0,
|
177
|
+
# "*DAMAGING" => 1,
|
178
|
+
# "DAMAGING" => 1}
|
179
|
+
|
180
|
+
#range.values_at *values
|
181
|
+
end
|
135
182
|
end
|
183
|
+
persist :sift_scores
|
136
184
|
|
137
185
|
property :mutation_assessor_scores => :array2single do
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
186
|
+
begin
|
187
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
188
|
+
|
189
|
+
correspondance = {}
|
190
|
+
list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
|
191
|
+
prot, change = mutation.split(":")
|
192
|
+
next if uniprot.nil?
|
193
|
+
uniprot_change = [uniprot.upcase, change.upcase]
|
194
|
+
correspondance[uniprot_change] ||= []
|
195
|
+
correspondance[uniprot_change] << mutation
|
196
|
+
uniprot_change
|
197
|
+
end.compact
|
198
|
+
|
199
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
|
200
|
+
return [nil] * self.length if list.empty?
|
201
|
+
|
202
|
+
tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
|
203
|
+
|
204
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
|
205
|
+
return [nil] * self.length if tsv.empty?
|
206
|
+
|
207
|
+
new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
|
208
|
+
|
209
|
+
tsv.each do |key, values|
|
210
|
+
uniprot, change = key.split(" ")
|
211
|
+
uniprot_change = [uniprot.upcase, change.upcase]
|
212
|
+
correspondance[uniprot_change].each do |mutation|
|
213
|
+
new[mutation] = values["Func. Impact"]
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
|
218
|
+
range = {nil => nil,
|
219
|
+
"" => nil,
|
220
|
+
"neutral" => 0,
|
221
|
+
"low" => 0.5,
|
222
|
+
"medium" => 0.7,
|
223
|
+
"high" => 1.0}
|
224
|
+
|
225
|
+
range.values_at *new.values_at(*self)
|
226
|
+
end
|
177
227
|
end
|
228
|
+
persist :mutation_assessor_scores
|
178
229
|
|
230
|
+
property :pdbs => :single do
|
231
|
+
uniprot = self.transcript.protein.uniprot
|
232
|
+
next if uniprot.nil?
|
233
|
+
Uniprot.pdbs_covering_aa_position(uniprot, self.position)
|
234
|
+
end
|
235
|
+
persist :pdbs
|
179
236
|
end
|
data/lib/rbbt/entity/pmid.rb
CHANGED
@@ -6,14 +6,24 @@ module PMID
|
|
6
6
|
|
7
7
|
self.format = "PMID"
|
8
8
|
|
9
|
+
property :article => :array2single do
|
10
|
+
PubMed.get_article(self).values_at(*self)
|
11
|
+
end
|
12
|
+
persist :article
|
13
|
+
|
9
14
|
property :title => :array2single do
|
10
|
-
|
11
|
-
|
12
|
-
|
15
|
+
article.collect{|a| a.nil? ? nil : a.title}
|
16
|
+
end
|
17
|
+
persist :title
|
18
|
+
|
19
|
+
property :text => :array2single do
|
20
|
+
article.collect{|a| a.nil? ? nil : a.text}
|
13
21
|
end
|
22
|
+
persist :text
|
14
23
|
|
15
24
|
property :pubmed_url => :single2array do
|
16
25
|
"<a class='pmid' href='http://www.ncbi.nlm.nih.gov/pubmed/#{self}'>#{ self }</a>"
|
17
26
|
end
|
27
|
+
persist :pubmed_url
|
18
28
|
end
|
19
29
|
|