rbbt-entities 1.1.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/icgc2rbbt.rb +23 -0
- data/bin/vcf2rbbt.rb +15 -0
- data/lib/rbbt/entity/chromosome_range.rb +73 -0
- data/lib/rbbt/entity/cnv.rb +20 -2
- data/lib/rbbt/entity/gene.rb +147 -74
- data/lib/rbbt/entity/genomic_mutation.rb +380 -50
- data/lib/rbbt/entity/genotype.rb +10 -4
- data/lib/rbbt/entity/interactor.rb +6 -0
- data/lib/rbbt/entity/mutated_isoform.rb +171 -83
- data/lib/rbbt/entity/pmid.rb +33 -6
- data/lib/rbbt/entity/protein.rb +36 -7
- data/lib/rbbt/entity/transcript.rb +20 -4
- data/lib/rbbt/entity.rb +123 -68
- data/test/rbbt/entity/test_gene.rb +16 -2
- data/test/rbbt/entity/test_genomic_mutation.rb +53 -1
- data/test/rbbt/entity/test_pmid.rb +19 -0
- data/test/rbbt/test_entity.rb +100 -5
- metadata +51 -72
@@ -1,11 +1,17 @@
|
|
1
|
-
require 'rbbt/entity'
|
2
1
|
require 'rbbt/workflow'
|
3
|
-
|
4
|
-
require 'rbbt/
|
2
|
+
|
3
|
+
require 'rbbt/entity'
|
5
4
|
require 'rbbt/entity/protein'
|
6
5
|
require 'rbbt/entity/gene'
|
7
6
|
require 'rbbt/entity/mutated_isoform'
|
8
7
|
|
8
|
+
require 'rbbt/sources/organism'
|
9
|
+
require 'rbbt/sources/genomes1000'
|
10
|
+
require 'rbbt/sources/COSMIC'
|
11
|
+
require 'rbbt/sources/dbSNP'
|
12
|
+
|
13
|
+
require 'rbbt/mutation/mutation_assessor'
|
14
|
+
|
9
15
|
Workflow.require_workflow "Sequence"
|
10
16
|
|
11
17
|
module GenomicMutation
|
@@ -14,22 +20,25 @@ module GenomicMutation
|
|
14
20
|
self.annotation :organism
|
15
21
|
self.annotation :watson
|
16
22
|
|
23
|
+
self.masked_annotations = [:jobname]
|
24
|
+
|
17
25
|
self.format = "Genomic Mutation"
|
18
26
|
|
19
27
|
property :guess_watson => :array do
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
28
|
+
if Array === self
|
29
|
+
@watson = Sequence.job(:is_watson, jobname, :mutations => self.clean_annotations, :organism => organism).run
|
30
|
+
else
|
31
|
+
@watson = Sequence.job(:is_watson, jobname, :mutations => [self.clean_annotations], :organism => organism).run
|
32
|
+
end
|
25
33
|
end
|
26
|
-
persist :guess_watson
|
34
|
+
#persist :guess_watson
|
27
35
|
|
28
36
|
def watson
|
29
|
-
if @watson.nil?
|
37
|
+
if @watson.nil? and Array === self
|
30
38
|
@watson = :missing
|
31
39
|
@watson = guess_watson
|
32
40
|
end
|
41
|
+
@watson = false if @watson == "false"
|
33
42
|
@watson
|
34
43
|
end
|
35
44
|
|
@@ -37,45 +46,168 @@ module GenomicMutation
|
|
37
46
|
@watson
|
38
47
|
end
|
39
48
|
|
49
|
+
def self.exon_rank_index(organism)
|
50
|
+
@@exon_rank_indices ||= {}
|
51
|
+
@@exon_rank_indices[organism] ||= Organism.transcript_exons(organism).tsv :persist => true, :type => :double, :unnamed => true
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.exon_position_index(organism)
|
55
|
+
@@exon_position_indices ||= {}
|
56
|
+
@@exon_position_indices[organism] ||= Organism.exons(organism).tsv :persist => true, :type => :list, :cast => :to_i, :fields => ["Exon Strand", "Exon Chr Start", "Exon Chr End"], :unnamed => true
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.transcripts_for_exon_index(organism)
|
60
|
+
@@transcript_for_exon_indices ||= {}
|
61
|
+
@@transcript_for_exon_indices[organism] ||= Organism.transcript_exons(organism).tsv :persist => true, :type => :flat, :key_field => "Ensembl Exon ID", :fields => ["Ensembl Transcript ID"], :unnamed => true
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.genomes_1000_index(organism)
|
65
|
+
build = Organism.hg_build(organism)
|
66
|
+
@@genomes_1000_index ||= {}
|
67
|
+
@@genomes_1000_index[build] ||= Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"].tsv :key_field => "Genomic Mutation", :unnamed => true, :fields => ["Variant ID"], :type => :single, :persist => true
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.COSMIC_index(organism)
|
71
|
+
build = Organism.hg_build(organism)
|
72
|
+
field = {
|
73
|
+
"hg19" => "Mutation GRCh37 genome position",
|
74
|
+
"hg18" => "Mutation NCBI36 genome position",
|
75
|
+
|
76
|
+
}[build]
|
77
|
+
@@COSMIC_index ||= {}
|
78
|
+
@@COSMIC_index[build] ||= COSMIC.Mutations.tsv :key_field => field, :unnamed => true, :fields => ["Mutation ID"], :type => :single, :persist => true
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.dbSNP_index(organism)
|
82
|
+
build = Organism.hg_build(organism)
|
83
|
+
@@dbSNP_index ||= {}
|
84
|
+
@@dbSNP_index[build] ||= DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"].tsv :key_field => "Genomic Mutation", :unnamed => true, :type => :single, :persist => true
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.dbSNP_position_index(organism)
|
88
|
+
build = Organism.hg_build(organism)
|
89
|
+
|
90
|
+
@@dbSNP_position_index ||= {}
|
91
|
+
|
92
|
+
@@dbSNP_position_index[build] ||= TSV.open(
|
93
|
+
CMD::cmd('sed "s/\([[:alnum:]]\+\):\([[:digit:]]\+\):\([ACTG+-]\+\)/\1:\2/" ', :in => DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"].open, :pipe => true),
|
94
|
+
:key_field => "Genomic Mutation", :unnamed => true, :type => :single, :persist => true)
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
property :bases_in_range => :single2array do |range|
|
99
|
+
start = range.begin+position-1
|
100
|
+
eend = range.end - range.begin + 1
|
101
|
+
File.open(Organism[organism]["chromosome_#{chromosome}"].find) do |f|
|
102
|
+
f.seek start
|
103
|
+
f.read eend
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
property :dbSNP_position => :array2single do
|
108
|
+
index ||= GenomicMutation.dbSNP_position_index(organism)
|
109
|
+
index.chunked_values_at self.collect{|m| m.split(":")[0..1] * ":" }
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
property :dbSNP => :array2single do
|
114
|
+
index ||= GenomicMutation.dbSNP_index(organism)
|
115
|
+
index.chunked_values_at self.collect{|m| m.split(":")[0..2] * ":" }
|
116
|
+
end
|
117
|
+
|
118
|
+
property :genomes_1000 => :array2single do
|
119
|
+
index ||= GenomicMutation.genomes_1000_index(organism)
|
120
|
+
index.chunked_values_at self.collect{|m| m.split(":")[0..2] * ":" }
|
121
|
+
end
|
122
|
+
|
123
|
+
property :COSMIC => :array2single do
|
124
|
+
index ||= GenomicMutation.COSMIC_index(organism)
|
125
|
+
index.chunked_values_at self.collect{|m| m.split(":").values_at(0,1,1) * ":" }
|
126
|
+
end
|
127
|
+
|
40
128
|
property :ensembl_browser => :single2array do
|
41
129
|
"http://#{Misc.ensembl_server(self.organism)}/Homo_sapiens/Location/View?db=core&r=#{chromosome}:#{position - 100}-#{position + 100}"
|
42
130
|
end
|
43
|
-
persist :ensembl_browser
|
131
|
+
#persist :ensembl_browser
|
44
132
|
|
45
133
|
property :chromosome => :array2single do
|
46
134
|
self.clean_annotations.collect{|mut| mut.split(":")[0]}
|
47
135
|
end
|
48
|
-
persist :
|
136
|
+
#persist :_ary_chromosome
|
49
137
|
|
50
138
|
property :position => :array2single do
|
51
139
|
self.clean_annotations.collect{|mut| mut.split(":")[1].to_i}
|
52
140
|
end
|
53
|
-
persist :
|
141
|
+
#persist :_ary_position
|
54
142
|
|
55
143
|
property :base => :array2single do
|
56
144
|
self.clean_annotations.collect{|mut| mut.split(":")[2]}
|
57
145
|
end
|
58
|
-
persist :
|
146
|
+
#persist :_ary_base
|
59
147
|
|
60
148
|
property :reference => :array2single do
|
61
149
|
Sequence.reference_allele_at_chr_positions(organism, chromosome, position)
|
62
150
|
end
|
63
|
-
persist :
|
151
|
+
#persist :_ary_reference
|
152
|
+
|
153
|
+
property :gene_strand_reference => :array2single do
|
154
|
+
genes = self.genes
|
155
|
+
gene_strand = Misc.process_to_hash(genes.compact.flatten){|list| list.any? ? list.strand : []}
|
156
|
+
reverse = genes.collect{|list| not list.nil? and list.select{|gene| gene_strand[gene].to_s == "-1" }.any? }
|
157
|
+
forward = genes.collect{|list| not list.nil? and list.select{|gene| gene_strand[gene].to_s == "1" }.any? }
|
158
|
+
reference.zip(reverse, forward, base).collect{|reference,reverse, forward, base|
|
159
|
+
case
|
160
|
+
when (reverse and not forward)
|
161
|
+
Misc::BASE2COMPLEMENT[reference]
|
162
|
+
when (forward and not reverse)
|
163
|
+
reference
|
164
|
+
else
|
165
|
+
base == reference ? Misc::BASE2COMPLEMENT[reference] : reference
|
166
|
+
end
|
167
|
+
}
|
168
|
+
end
|
169
|
+
#persist :_ary_gene_strand_reference
|
170
|
+
|
171
|
+
# DID NOT TRY THIS. Its supposed to deal with ambiguous gene overlaps by
|
172
|
+
# taking the first coding gene, if any
|
173
|
+
#property :gene_strand_reference => :array2single do
|
174
|
+
# genes = self.genes
|
175
|
+
# gene_strand = Misc.process_to_hash(genes.compact.flatten){|list| list.strand }
|
176
|
+
# gene_biotype = Misc.process_to_hash(genes.compact.flatten){|list| list.biotype }
|
177
|
+
# reference.zip(genes).collect{|reference,genes|
|
178
|
+
# case
|
179
|
+
# when (genes.nil? or genes.empty?)
|
180
|
+
# reference
|
181
|
+
# when genes.length == 1
|
182
|
+
# gene_strand[genes[0]] == "-1" ? Misc::BASE2COMPLEMENT[reference] : reference
|
183
|
+
# else
|
184
|
+
# coding_genes = genes.zip(gene_strand.values_at(*genes)).select{|gene,strand| gene_biotype[gene] == "protein_coding"}
|
185
|
+
|
186
|
+
# if coding_genes.empty?
|
187
|
+
# reference
|
188
|
+
# else
|
189
|
+
# coding_genes[0][1] == "-1" ? Misc::BASE2COMPLEMENT[reference] : reference
|
190
|
+
# end
|
191
|
+
# end
|
192
|
+
# }
|
193
|
+
#end
|
194
|
+
##persist :_ary_gene_strand_reference
|
195
|
+
|
64
196
|
|
65
197
|
property :score => :array2single do
|
66
198
|
self.clean_annotations.collect{|mut| mut.split(":")[3].to_f}
|
67
199
|
end
|
68
|
-
persist :
|
200
|
+
#persist :_ary_score
|
69
201
|
|
70
202
|
property :remove_score => :array2single do
|
71
203
|
self.annotate(self.collect{|mut| mut.split(":")[0..2] * ":"})
|
72
204
|
end
|
73
|
-
persist :remove_score
|
205
|
+
#persist :remove_score
|
74
206
|
|
75
207
|
property :noscore => :single2array do
|
76
208
|
self.annotate self.clean_annotations.collect{|mut| mut.split(":")[0..2]}
|
77
209
|
end
|
78
|
-
persist :noscore
|
210
|
+
#persist :noscore
|
79
211
|
|
80
212
|
property :to_watson => :array2single do
|
81
213
|
if watson
|
@@ -86,15 +218,19 @@ module GenomicMutation
|
|
86
218
|
result
|
87
219
|
end
|
88
220
|
end
|
89
|
-
persist :to_watson
|
221
|
+
#persist :to_watson
|
90
222
|
|
91
223
|
property :reference => :array2single do
|
92
|
-
Sequence.job(:reference_allele_at_genomic_positions, jobname, :positions => self.clean_annotations, :organism => organism).run
|
224
|
+
tsv = Sequence.job(:reference_allele_at_genomic_positions, jobname, :positions => self.clean_annotations, :organism => organism, :unnamed => true).run
|
225
|
+
tsv.chunked_values_at self
|
93
226
|
end
|
94
|
-
persist :reference
|
227
|
+
#persist :reference
|
95
228
|
|
96
229
|
property :type => :array2single do
|
230
|
+
reference = watson ? self.reference : self.gene_strand_reference
|
231
|
+
|
97
232
|
self.base.zip(reference).collect do |base,reference|
|
233
|
+
|
98
234
|
type = case
|
99
235
|
when base == reference
|
100
236
|
"none"
|
@@ -103,22 +239,23 @@ module GenomicMutation
|
|
103
239
|
when (base.length > 1 or base == '-')
|
104
240
|
"indel"
|
105
241
|
when (not %w(A G T C).include? base and not %w(A G T C).include? reference)
|
106
|
-
|
107
|
-
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["
|
108
|
-
"transition"
|
109
|
-
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
|
110
|
-
"transition"
|
111
|
-
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and not ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
|
242
|
+
"unknown"
|
243
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
|
112
244
|
"transversion"
|
113
|
-
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and
|
245
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
|
114
246
|
"transversion"
|
247
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || [nil]) & ["T", "C", nil]).empty?)
|
248
|
+
"transition"
|
249
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || [nil]) & ["A", "G", nil]).empty?)
|
250
|
+
"transition"
|
115
251
|
else
|
116
|
-
"unknown
|
252
|
+
"unknown"
|
117
253
|
end
|
118
254
|
type
|
119
255
|
end
|
256
|
+
|
120
257
|
end
|
121
|
-
persist :type
|
258
|
+
#persist :type
|
122
259
|
|
123
260
|
property :offset_in_genes => :array2single do
|
124
261
|
gene2chr_start = Misc.process_to_hash(genes.flatten){|list| list.chr_start}
|
@@ -129,38 +266,87 @@ module GenomicMutation
|
|
129
266
|
}.compact
|
130
267
|
}
|
131
268
|
end
|
132
|
-
persist :offset_in_genes
|
269
|
+
#persist :offset_in_genes
|
133
270
|
|
134
271
|
property :genes => :array2single do
|
135
|
-
|
136
|
-
|
137
|
-
genes =
|
272
|
+
genes_tsv = Sequence.job(:genes_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run
|
273
|
+
genes_tsv.unnamed = true
|
274
|
+
genes = nil
|
275
|
+
genes = genes_tsv.chunked_values_at self
|
138
276
|
Gene.setup(genes, "Ensembl Gene ID", organism)
|
139
277
|
end
|
140
|
-
persist :
|
278
|
+
#persist :_ary_genes
|
279
|
+
|
280
|
+
property :affected_genes => :array2single do
|
281
|
+
_mutated_isoforms = mutated_isoforms
|
282
|
+
mi_gene = Misc.process_to_hash(MutatedIsoform.setup(_mutated_isoforms.compact.flatten.uniq, organism)){|mis| mis.protein.gene}
|
283
|
+
from_protein = mutated_isoforms.collect{|mis|
|
284
|
+
genes = mis.nil? ? [] : mi_gene.values_at(*mis).compact
|
285
|
+
Gene.setup(genes.uniq, "Ensembl Gene ID", organism)
|
286
|
+
}
|
287
|
+
is_exon_junction = self.in_exon_junction?.zip(self.type).collect{|in_ex,type| in_ex and type != "none"}
|
288
|
+
genes_with_altered_splicing = self.transcripts_with_affected_splicing.collect{|transcripts| transcripts.gene}
|
289
|
+
from_protein.each_with_index do |list, i|
|
290
|
+
if is_exon_junction[i] and genes_with_altered_splicing[i]
|
291
|
+
list.concat genes_with_altered_splicing[i]
|
292
|
+
list.uniq!
|
293
|
+
end
|
294
|
+
end
|
295
|
+
Gene.setup(from_protein, "Ensembl Gene ID", organism)
|
296
|
+
end
|
297
|
+
#persist :_ary_affected_genes
|
298
|
+
|
299
|
+
|
300
|
+
property :relevant? => :array2single do
|
301
|
+
affected_genes.collect{|list| list and list.any?}
|
302
|
+
end
|
303
|
+
|
304
|
+
property :damaged_genes => :array2single do |*args|
|
305
|
+
_mutated_isoforms = mutated_isoforms
|
306
|
+
mi_damaged = Misc.process_to_hash(MutatedIsoform.setup(_mutated_isoforms.compact.flatten.uniq, organism)){|mis| mis.damaged?(*args)}
|
307
|
+
mi_gene = Misc.process_to_hash(MutatedIsoform.setup(_mutated_isoforms.compact.flatten.uniq, organism)){|mis| mis.protein.gene}
|
308
|
+
from_protein = _mutated_isoforms.collect{|mis|
|
309
|
+
genes = mis.nil? ? [] : mi_gene.values_at(*mis.select{|mi| mi_damaged[mi]}).compact
|
310
|
+
Gene.setup(genes.uniq, "Ensembl Gene ID", organism)
|
311
|
+
}
|
312
|
+
|
313
|
+
ej_transcripts = transcripts_with_affected_splicing
|
314
|
+
_type = self.type
|
315
|
+
|
316
|
+
from_protein.each_with_index do |list, i|
|
317
|
+
if ej_transcripts[i] and ej_transcripts[i].any? and _type[i] != 'none'
|
318
|
+
list.concat ej_transcripts[i].gene
|
319
|
+
list.uniq!
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
Gene.setup(from_protein, "Ensembl Gene ID", organism)
|
324
|
+
end
|
325
|
+
#persist :_ary_damaged_genes
|
141
326
|
|
142
327
|
property :mutated_isoforms => :array2single do
|
143
|
-
res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self.clean_annotations).run.
|
328
|
+
res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self.clean_annotations).run.chunked_values_at self
|
144
329
|
res.each{|list| list.organism = organism unless list.nil?}
|
145
330
|
res.compact[0].annotate res if res.compact[0].respond_to? :annotate
|
146
331
|
res
|
147
332
|
end
|
148
|
-
persist :
|
333
|
+
#persist :_ary_mutated_isoforms
|
149
334
|
|
150
335
|
property :exon_junctions => :array do
|
151
|
-
Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.
|
336
|
+
Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.chunked_values_at(self)
|
152
337
|
end
|
153
|
-
persist :exon_junctions
|
338
|
+
#persist :exon_junctions
|
339
|
+
#persist :_ary_in_exon_junction?
|
154
340
|
|
155
|
-
property :
|
156
|
-
|
341
|
+
property :over_range? => :array2single do |range|
|
342
|
+
chromosome.zip(position).collect{|chr,pos| chr == gene_chromosome and range.include? pos}
|
157
343
|
end
|
158
|
-
persist :
|
344
|
+
#persist :_ary_over_range?
|
159
345
|
|
160
346
|
property :over_gene? => :array2single do |gene|
|
161
347
|
gene = Gene.setup(gene.dup, "Ensembl Gene ID", organism) unless Gene === gene
|
162
348
|
|
163
|
-
gene_range = gene.
|
349
|
+
gene_range = gene.chr_range
|
164
350
|
gene_chromosome = gene.chromosome
|
165
351
|
|
166
352
|
if gene_range.nil?
|
@@ -169,19 +355,163 @@ module GenomicMutation
|
|
169
355
|
chromosome.zip(position).collect{|chr,pos| chr == gene_chromosome and gene_range.include? pos}
|
170
356
|
end
|
171
357
|
end
|
172
|
-
persist :
|
358
|
+
#persist :_ary_over_gene?
|
173
359
|
|
174
360
|
property :affected_exons => :array2single do
|
175
|
-
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.
|
361
|
+
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.chunked_values_at self
|
362
|
+
end
|
363
|
+
#persist :affected_exons
|
364
|
+
|
365
|
+
#property :transcripts_with_affected_splicing => :array2single do
|
366
|
+
# exon2transcript_index = GenomicMutation.transcripts_for_exon_index(organism)
|
367
|
+
# transcripts = exon_junctions.collect{|junctions|
|
368
|
+
# exons = junctions.nil? ? [] : junctions.collect{|exon_junction| exon_junction.split(":").first }
|
369
|
+
# exons.empty? ?
|
370
|
+
# [] : exon2transcript_index.chunked_values_at(exons).flatten
|
371
|
+
# }
|
372
|
+
# Transcript.setup(transcripts, "Ensembl Transcript ID", organism)
|
373
|
+
#end
|
374
|
+
|
375
|
+
#property :in_exon_junction? => :array2single do
|
376
|
+
# exon_position_index ||= GenomicMutation.exon_position_index(organism)
|
377
|
+
|
378
|
+
# start_pos = exon_position_index.identify_field "Exon Chr Start"
|
379
|
+
# strand_pos = exon_position_index.identify_field "Exon Strand"
|
380
|
+
# all_exons = self.genes.flatten.transcripts.compact.flatten.collect{|t| t.exons}.compact.flatten.uniq.select{|e| exon_position_index.include?(e) }.sort_by{|e| exon_position_index[e][start_pos] }
|
381
|
+
|
382
|
+
# first_exon = all_exons.first
|
383
|
+
# last_exon = all_exons.last
|
384
|
+
|
385
|
+
# exon_junctions.collect{|l|
|
386
|
+
# l.select{|j|
|
387
|
+
# exon, junction_type = j.split(":")
|
388
|
+
# if not exon_position_index.include? exon
|
389
|
+
# raise "Exon #{ exon } not in position index"
|
390
|
+
# end
|
391
|
+
# strand = exon_position_index[exon][strand_pos]
|
392
|
+
# case
|
393
|
+
# when (strand == 1 and exon == first_exon and junction_type =~ /acceptor/)
|
394
|
+
# false
|
395
|
+
# when (strand == 1 and exon == last_exon and junction_type =~ /donor/)
|
396
|
+
# false
|
397
|
+
# when (strand == -1 and exon == first_exon and junction_type =~ /donor/)
|
398
|
+
# false
|
399
|
+
# when (strand == -1 and exon == last_exon and junction_type =~ /acceptor/)
|
400
|
+
# false
|
401
|
+
# else
|
402
|
+
# true
|
403
|
+
# end
|
404
|
+
# }
|
405
|
+
# }.collect{|l| not l.nil? and not l.empty?}
|
406
|
+
#end
|
407
|
+
|
408
|
+
|
409
|
+
property :transcripts_with_affected_splicing => :array2single do
|
410
|
+
exon2transcript_index = GenomicMutation.transcripts_for_exon_index(organism)
|
411
|
+
transcript_exon_rank = GenomicMutation.exon_rank_index(organism)
|
412
|
+
|
413
|
+
transcripts = exon_junctions.collect{|junctions|
|
414
|
+
if junctions.nil? or junctions.empty?
|
415
|
+
[]
|
416
|
+
else
|
417
|
+
junctions.collect{|junction|
|
418
|
+
exon, junction_type = junction.split(":")
|
419
|
+
transcripts = exon2transcript_index[exon]
|
420
|
+
transcripts.select do |transcript|
|
421
|
+
transcript_info = transcript_exon_rank[transcript]
|
422
|
+
|
423
|
+
total_exons = transcript_info[0].length
|
424
|
+
rank = transcript_info[1][transcript_info[0].index(exon)].to_i
|
425
|
+
|
426
|
+
case
|
427
|
+
when (rank == 1 and junction_type =~ /acceptor/)
|
428
|
+
false
|
429
|
+
when (rank == total_exons and junction_type =~ /donor/)
|
430
|
+
false
|
431
|
+
else
|
432
|
+
true
|
433
|
+
end
|
434
|
+
end
|
435
|
+
}.flatten
|
436
|
+
end
|
437
|
+
}
|
438
|
+
Transcript.setup(transcripts, "Ensembl Transcript ID", organism)
|
439
|
+
end
|
440
|
+
|
441
|
+
property :in_exon_junction? => :array2single do |*args|
|
442
|
+
gene = args.first
|
443
|
+
if gene
|
444
|
+
transcripts_with_affected_splicing.collect{|list| list.nil? ? false : list.gene.include?(gene)}
|
445
|
+
else
|
446
|
+
transcripts_with_affected_splicing.collect{|list| list.nil? ? false : list.any?}
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
property :affected_transcripts => :array2single do
|
451
|
+
exon2transcript_index = GenomicMutation.transcripts_for_exon_index(organism)
|
452
|
+
transcripts = affected_exons.collect{|exons|
|
453
|
+
exons = [] if exons.nil?
|
454
|
+
exons.empty? ?
|
455
|
+
[] : exon2transcript_index.chunked_values_at(exons).flatten
|
456
|
+
}
|
457
|
+
Transcript.setup(transcripts, "Ensembl Transcript ID", organism)
|
458
|
+
end
|
459
|
+
#persist :affected_transcripts
|
460
|
+
|
461
|
+
|
462
|
+
property :coding? => :array2single do
|
463
|
+
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.
|
464
|
+
chunked_values_at(self).
|
465
|
+
collect{|exons|
|
466
|
+
GenomicMutation.transcripts_for_exon_index(organism).values_at(*exons).compact.flatten.any?
|
467
|
+
}
|
176
468
|
end
|
177
|
-
persist :affected_exons
|
178
469
|
|
179
470
|
property :damaging? => :array2single do |*args|
|
180
|
-
|
181
|
-
|
182
|
-
|
471
|
+
all_mutated_isoforms = mutated_isoforms.compact.flatten
|
472
|
+
damaged_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.damaged?(*args)}
|
473
|
+
exon_junctions.zip(mutated_isoforms, self.type).collect do |exs, mis, type|
|
474
|
+
(Array === exs and exs.any? and not type == "none") or
|
183
475
|
(Array === mis and (damaged_mutated_isoforms & mis).any?)
|
184
476
|
end
|
185
477
|
end
|
186
|
-
persist :damaging?
|
478
|
+
#persist :damaging?
|
479
|
+
|
480
|
+
property :worst_consequence => :array2single do |*args|
|
481
|
+
gene = args.first
|
482
|
+
|
483
|
+
all_mutated_isoforms = mutated_isoforms.compact.flatten
|
484
|
+
|
485
|
+
all_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.transcript.gene == gene} if gene
|
486
|
+
|
487
|
+
non_synonymous_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.non_synonymous}
|
488
|
+
truncated_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.truncated}
|
489
|
+
damage_scores = Misc.process_to_hash(non_synonymous_mutated_isoforms){|mis| mis.any? ? mis.damage_scores : []}
|
490
|
+
damaged = all_mutated_isoforms.select{|mi| mi.damaged? }
|
491
|
+
|
492
|
+
in_exon_junction?(gene).zip(mutated_isoforms, type).collect{|ej,mis,type|
|
493
|
+
case
|
494
|
+
when (mis.nil? or mis.subset(non_synonymous_mutated_isoforms).empty? and ej and not type == 'none')
|
495
|
+
"In Exon Junction"
|
496
|
+
when (Array === mis and mis.subset(truncated_mutated_isoforms).any?)
|
497
|
+
mis.subset(truncated_mutated_isoforms).first
|
498
|
+
when (Array === mis and mis.subset(non_synonymous_mutated_isoforms).any?)
|
499
|
+
mis.subset(non_synonymous_mutated_isoforms).sort{|mi1, mi2|
|
500
|
+
ds1 = damage_scores[mi1] || 0
|
501
|
+
ds2 = damage_scores[mi2] || 0
|
502
|
+
case
|
503
|
+
when (damaged.include?(mi1) == damaged.include?(mi2))
|
504
|
+
d1 = mi1.protein.interpro_domains || []
|
505
|
+
d2 = mi2.protein.interpro_domains || []
|
506
|
+
d1.length <=> d2.length
|
507
|
+
else
|
508
|
+
ds1 <=> ds2
|
509
|
+
end
|
510
|
+
}.last
|
511
|
+
else
|
512
|
+
nil
|
513
|
+
end
|
514
|
+
}
|
515
|
+
end
|
187
516
|
end
|
517
|
+
|
data/lib/rbbt/entity/genotype.rb
CHANGED
@@ -51,14 +51,20 @@ module Genotype
|
|
51
51
|
|
52
52
|
def jobname
|
53
53
|
if @jobname.nil?
|
54
|
-
|
55
|
-
|
54
|
+
if self.length > 3
|
55
|
+
@jobname ||= "Meta-genotype: " + self.collect{|g| g.jobname}[0..2] * ", " + " + #{self.length - 3} more"
|
56
|
+
else
|
57
|
+
@jobname ||= "Meta-genotype: " + self.collect{|g| g.jobname} * ", "
|
58
|
+
end
|
59
|
+
@jobname[100..-1] = " (etc. #{self.length} genotypes)" if @jobname.length > 135
|
56
60
|
end
|
57
61
|
@jobname
|
58
62
|
end
|
59
63
|
|
60
64
|
def metagenotype
|
61
|
-
|
65
|
+
organism = self.collect{|g| g.organism}.compact.first
|
66
|
+
orig_watson = self.collect{|g| g.orig_watson}.compact.first
|
67
|
+
GenomicMutation.setup(self.dup.flatten, jobname, organism, orig_watson).extend Genotype
|
62
68
|
end
|
63
69
|
|
64
70
|
|
@@ -123,7 +129,7 @@ module Genotype
|
|
123
129
|
returns "Ensembl Gene ID"
|
124
130
|
task :with_non_synonymous_mutations => :array do
|
125
131
|
set_info :organism, genotype.organism
|
126
|
-
genotype.mutated_isoforms.flatten.compact.
|
132
|
+
genotype.mutated_isoforms.flatten.compact.select{|mutated_isoform| mutated_isoform.non_synonymous}.transcript.gene.uniq
|
127
133
|
end
|
128
134
|
|
129
135
|
returns "Ensembl Gene ID"
|