rbbt-entities 1.1.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,17 @@
1
- require 'rbbt/entity'
2
1
  require 'rbbt/workflow'
3
- require 'rbbt/sources/organism'
4
- require 'rbbt/mutation/mutation_assessor'
2
+
3
+ require 'rbbt/entity'
5
4
  require 'rbbt/entity/protein'
6
5
  require 'rbbt/entity/gene'
7
6
  require 'rbbt/entity/mutated_isoform'
8
7
 
8
+ require 'rbbt/sources/organism'
9
+ require 'rbbt/sources/genomes1000'
10
+ require 'rbbt/sources/COSMIC'
11
+ require 'rbbt/sources/dbSNP'
12
+
13
+ require 'rbbt/mutation/mutation_assessor'
14
+
9
15
  Workflow.require_workflow "Sequence"
10
16
 
11
17
  module GenomicMutation
@@ -14,22 +20,25 @@ module GenomicMutation
14
20
  self.annotation :organism
15
21
  self.annotation :watson
16
22
 
23
+ self.masked_annotations = [:jobname]
24
+
17
25
  self.format = "Genomic Mutation"
18
26
 
19
27
  property :guess_watson => :array do
20
- if Array === self
21
- @watson = Sequence.job(:is_watson, jobname, :mutations => self.clean_annotations, :organism => organism).run
22
- else
23
- @watson = Sequence.job(:is_watson, jobname, :mutations => [self.clean_annotations], :organism => organism).run
24
- end
28
+ if Array === self
29
+ @watson = Sequence.job(:is_watson, jobname, :mutations => self.clean_annotations, :organism => organism).run
30
+ else
31
+ @watson = Sequence.job(:is_watson, jobname, :mutations => [self.clean_annotations], :organism => organism).run
32
+ end
25
33
  end
26
- persist :guess_watson
34
+ #persist :guess_watson
27
35
 
28
36
  def watson
29
- if @watson.nil?
37
+ if @watson.nil? and Array === self
30
38
  @watson = :missing
31
39
  @watson = guess_watson
32
40
  end
41
+ @watson = false if @watson == "false"
33
42
  @watson
34
43
  end
35
44
 
@@ -37,45 +46,168 @@ module GenomicMutation
37
46
  @watson
38
47
  end
39
48
 
49
+ def self.exon_rank_index(organism)
50
+ @@exon_rank_indices ||= {}
51
+ @@exon_rank_indices[organism] ||= Organism.transcript_exons(organism).tsv :persist => true, :type => :double, :unnamed => true
52
+ end
53
+
54
+ def self.exon_position_index(organism)
55
+ @@exon_position_indices ||= {}
56
+ @@exon_position_indices[organism] ||= Organism.exons(organism).tsv :persist => true, :type => :list, :cast => :to_i, :fields => ["Exon Strand", "Exon Chr Start", "Exon Chr End"], :unnamed => true
57
+ end
58
+
59
+ def self.transcripts_for_exon_index(organism)
60
+ @@transcript_for_exon_indices ||= {}
61
+ @@transcript_for_exon_indices[organism] ||= Organism.transcript_exons(organism).tsv :persist => true, :type => :flat, :key_field => "Ensembl Exon ID", :fields => ["Ensembl Transcript ID"], :unnamed => true
62
+ end
63
+
64
+ def self.genomes_1000_index(organism)
65
+ build = Organism.hg_build(organism)
66
+ @@genomes_1000_index ||= {}
67
+ @@genomes_1000_index[build] ||= Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"].tsv :key_field => "Genomic Mutation", :unnamed => true, :fields => ["Variant ID"], :type => :single, :persist => true
68
+ end
69
+
70
+ def self.COSMIC_index(organism)
71
+ build = Organism.hg_build(organism)
72
+ field = {
73
+ "hg19" => "Mutation GRCh37 genome position",
74
+ "hg18" => "Mutation NCBI36 genome position",
75
+
76
+ }[build]
77
+ @@COSMIC_index ||= {}
78
+ @@COSMIC_index[build] ||= COSMIC.Mutations.tsv :key_field => field, :unnamed => true, :fields => ["Mutation ID"], :type => :single, :persist => true
79
+ end
80
+
81
+ def self.dbSNP_index(organism)
82
+ build = Organism.hg_build(organism)
83
+ @@dbSNP_index ||= {}
84
+ @@dbSNP_index[build] ||= DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"].tsv :key_field => "Genomic Mutation", :unnamed => true, :type => :single, :persist => true
85
+ end
86
+
87
+ def self.dbSNP_position_index(organism)
88
+ build = Organism.hg_build(organism)
89
+
90
+ @@dbSNP_position_index ||= {}
91
+
92
+ @@dbSNP_position_index[build] ||= TSV.open(
93
+ CMD::cmd('sed "s/\([[:alnum:]]\+\):\([[:digit:]]\+\):\([ACTG+-]\+\)/\1:\2/" ', :in => DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"].open, :pipe => true),
94
+ :key_field => "Genomic Mutation", :unnamed => true, :type => :single, :persist => true)
95
+
96
+ end
97
+
98
+ property :bases_in_range => :single2array do |range|
99
+ start = range.begin+position-1
100
+ eend = range.end - range.begin + 1
101
+ File.open(Organism[organism]["chromosome_#{chromosome}"].find) do |f|
102
+ f.seek start
103
+ f.read eend
104
+ end
105
+ end
106
+
107
+ property :dbSNP_position => :array2single do
108
+ index ||= GenomicMutation.dbSNP_position_index(organism)
109
+ index.chunked_values_at self.collect{|m| m.split(":")[0..1] * ":" }
110
+ end
111
+
112
+
113
+ property :dbSNP => :array2single do
114
+ index ||= GenomicMutation.dbSNP_index(organism)
115
+ index.chunked_values_at self.collect{|m| m.split(":")[0..2] * ":" }
116
+ end
117
+
118
+ property :genomes_1000 => :array2single do
119
+ index ||= GenomicMutation.genomes_1000_index(organism)
120
+ index.chunked_values_at self.collect{|m| m.split(":")[0..2] * ":" }
121
+ end
122
+
123
+ property :COSMIC => :array2single do
124
+ index ||= GenomicMutation.COSMIC_index(organism)
125
+ index.chunked_values_at self.collect{|m| m.split(":").values_at(0,1,1) * ":" }
126
+ end
127
+
40
128
  property :ensembl_browser => :single2array do
41
129
  "http://#{Misc.ensembl_server(self.organism)}/Homo_sapiens/Location/View?db=core&r=#{chromosome}:#{position - 100}-#{position + 100}"
42
130
  end
43
- persist :ensembl_browser
131
+ #persist :ensembl_browser
44
132
 
45
133
  property :chromosome => :array2single do
46
134
  self.clean_annotations.collect{|mut| mut.split(":")[0]}
47
135
  end
48
- persist :chromosome
136
+ #persist :_ary_chromosome
49
137
 
50
138
  property :position => :array2single do
51
139
  self.clean_annotations.collect{|mut| mut.split(":")[1].to_i}
52
140
  end
53
- persist :position
141
+ #persist :_ary_position
54
142
 
55
143
  property :base => :array2single do
56
144
  self.clean_annotations.collect{|mut| mut.split(":")[2]}
57
145
  end
58
- persist :base
146
+ #persist :_ary_base
59
147
 
60
148
  property :reference => :array2single do
61
149
  Sequence.reference_allele_at_chr_positions(organism, chromosome, position)
62
150
  end
63
- persist :reference
151
+ #persist :_ary_reference
152
+
153
+ property :gene_strand_reference => :array2single do
154
+ genes = self.genes
155
+ gene_strand = Misc.process_to_hash(genes.compact.flatten){|list| list.any? ? list.strand : []}
156
+ reverse = genes.collect{|list| not list.nil? and list.select{|gene| gene_strand[gene].to_s == "-1" }.any? }
157
+ forward = genes.collect{|list| not list.nil? and list.select{|gene| gene_strand[gene].to_s == "1" }.any? }
158
+ reference.zip(reverse, forward, base).collect{|reference,reverse, forward, base|
159
+ case
160
+ when (reverse and not forward)
161
+ Misc::BASE2COMPLEMENT[reference]
162
+ when (forward and not reverse)
163
+ reference
164
+ else
165
+ base == reference ? Misc::BASE2COMPLEMENT[reference] : reference
166
+ end
167
+ }
168
+ end
169
+ #persist :_ary_gene_strand_reference
170
+
171
+ # DID NOT TRY THIS. Its supposed to deal with ambiguous gene overlaps by
172
+ # taking the first coding gene, if any
173
+ #property :gene_strand_reference => :array2single do
174
+ # genes = self.genes
175
+ # gene_strand = Misc.process_to_hash(genes.compact.flatten){|list| list.strand }
176
+ # gene_biotype = Misc.process_to_hash(genes.compact.flatten){|list| list.biotype }
177
+ # reference.zip(genes).collect{|reference,genes|
178
+ # case
179
+ # when (genes.nil? or genes.empty?)
180
+ # reference
181
+ # when genes.length == 1
182
+ # gene_strand[genes[0]] == "-1" ? Misc::BASE2COMPLEMENT[reference] : reference
183
+ # else
184
+ # coding_genes = genes.zip(gene_strand.values_at(*genes)).select{|gene,strand| gene_biotype[gene] == "protein_coding"}
185
+
186
+ # if coding_genes.empty?
187
+ # reference
188
+ # else
189
+ # coding_genes[0][1] == "-1" ? Misc::BASE2COMPLEMENT[reference] : reference
190
+ # end
191
+ # end
192
+ # }
193
+ #end
194
+ ##persist :_ary_gene_strand_reference
195
+
64
196
 
65
197
  property :score => :array2single do
66
198
  self.clean_annotations.collect{|mut| mut.split(":")[3].to_f}
67
199
  end
68
- persist :score
200
+ #persist :_ary_score
69
201
 
70
202
  property :remove_score => :array2single do
71
203
  self.annotate(self.collect{|mut| mut.split(":")[0..2] * ":"})
72
204
  end
73
- persist :remove_score
205
+ #persist :remove_score
74
206
 
75
207
  property :noscore => :single2array do
76
208
  self.annotate self.clean_annotations.collect{|mut| mut.split(":")[0..2]}
77
209
  end
78
- persist :noscore
210
+ #persist :noscore
79
211
 
80
212
  property :to_watson => :array2single do
81
213
  if watson
@@ -86,15 +218,19 @@ module GenomicMutation
86
218
  result
87
219
  end
88
220
  end
89
- persist :to_watson
221
+ #persist :to_watson
90
222
 
91
223
  property :reference => :array2single do
92
- Sequence.job(:reference_allele_at_genomic_positions, jobname, :positions => self.clean_annotations, :organism => organism).run.values_at *self
224
+ tsv = Sequence.job(:reference_allele_at_genomic_positions, jobname, :positions => self.clean_annotations, :organism => organism, :unnamed => true).run
225
+ tsv.chunked_values_at self
93
226
  end
94
- persist :reference
227
+ #persist :reference
95
228
 
96
229
  property :type => :array2single do
230
+ reference = watson ? self.reference : self.gene_strand_reference
231
+
97
232
  self.base.zip(reference).collect do |base,reference|
233
+
98
234
  type = case
99
235
  when base == reference
100
236
  "none"
@@ -103,22 +239,23 @@ module GenomicMutation
103
239
  when (base.length > 1 or base == '-')
104
240
  "indel"
105
241
  when (not %w(A G T C).include? base and not %w(A G T C).include? reference)
106
- nil
107
- when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
108
- "transition"
109
- when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
110
- "transition"
111
- when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and not ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
242
+ "unknown"
243
+ when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
112
244
  "transversion"
113
- when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and not ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
245
+ when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
114
246
  "transversion"
247
+ when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || [nil]) & ["T", "C", nil]).empty?)
248
+ "transition"
249
+ when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || [nil]) & ["A", "G", nil]).empty?)
250
+ "transition"
115
251
  else
116
- "unknown [#{[base, reference] * " - "}]"
252
+ "unknown"
117
253
  end
118
254
  type
119
255
  end
256
+
120
257
  end
121
- persist :type
258
+ #persist :type
122
259
 
123
260
  property :offset_in_genes => :array2single do
124
261
  gene2chr_start = Misc.process_to_hash(genes.flatten){|list| list.chr_start}
@@ -129,38 +266,87 @@ module GenomicMutation
129
266
  }.compact
130
267
  }
131
268
  end
132
- persist :offset_in_genes
269
+ #persist :offset_in_genes
133
270
 
134
271
  property :genes => :array2single do
135
- genes = Sequence.job(:genes_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run
136
- genes.unnamed = true
137
- genes = genes.values_at *self
272
+ genes_tsv = Sequence.job(:genes_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run
273
+ genes_tsv.unnamed = true
274
+ genes = nil
275
+ genes = genes_tsv.chunked_values_at self
138
276
  Gene.setup(genes, "Ensembl Gene ID", organism)
139
277
  end
140
- persist :genes
278
+ #persist :_ary_genes
279
+
280
+ property :affected_genes => :array2single do
281
+ _mutated_isoforms = mutated_isoforms
282
+ mi_gene = Misc.process_to_hash(MutatedIsoform.setup(_mutated_isoforms.compact.flatten.uniq, organism)){|mis| mis.protein.gene}
283
+ from_protein = mutated_isoforms.collect{|mis|
284
+ genes = mis.nil? ? [] : mi_gene.values_at(*mis).compact
285
+ Gene.setup(genes.uniq, "Ensembl Gene ID", organism)
286
+ }
287
+ is_exon_junction = self.in_exon_junction?.zip(self.type).collect{|in_ex,type| in_ex and type != "none"}
288
+ genes_with_altered_splicing = self.transcripts_with_affected_splicing.collect{|transcripts| transcripts.gene}
289
+ from_protein.each_with_index do |list, i|
290
+ if is_exon_junction[i] and genes_with_altered_splicing[i]
291
+ list.concat genes_with_altered_splicing[i]
292
+ list.uniq!
293
+ end
294
+ end
295
+ Gene.setup(from_protein, "Ensembl Gene ID", organism)
296
+ end
297
+ #persist :_ary_affected_genes
298
+
299
+
300
+ property :relevant? => :array2single do
301
+ affected_genes.collect{|list| list and list.any?}
302
+ end
303
+
304
+ property :damaged_genes => :array2single do |*args|
305
+ _mutated_isoforms = mutated_isoforms
306
+ mi_damaged = Misc.process_to_hash(MutatedIsoform.setup(_mutated_isoforms.compact.flatten.uniq, organism)){|mis| mis.damaged?(*args)}
307
+ mi_gene = Misc.process_to_hash(MutatedIsoform.setup(_mutated_isoforms.compact.flatten.uniq, organism)){|mis| mis.protein.gene}
308
+ from_protein = _mutated_isoforms.collect{|mis|
309
+ genes = mis.nil? ? [] : mi_gene.values_at(*mis.select{|mi| mi_damaged[mi]}).compact
310
+ Gene.setup(genes.uniq, "Ensembl Gene ID", organism)
311
+ }
312
+
313
+ ej_transcripts = transcripts_with_affected_splicing
314
+ _type = self.type
315
+
316
+ from_protein.each_with_index do |list, i|
317
+ if ej_transcripts[i] and ej_transcripts[i].any? and _type[i] != 'none'
318
+ list.concat ej_transcripts[i].gene
319
+ list.uniq!
320
+ end
321
+ end
322
+
323
+ Gene.setup(from_protein, "Ensembl Gene ID", organism)
324
+ end
325
+ #persist :_ary_damaged_genes
141
326
 
142
327
  property :mutated_isoforms => :array2single do
143
- res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self.clean_annotations).run.values_at *self
328
+ res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self.clean_annotations).run.chunked_values_at self
144
329
  res.each{|list| list.organism = organism unless list.nil?}
145
330
  res.compact[0].annotate res if res.compact[0].respond_to? :annotate
146
331
  res
147
332
  end
148
- persist :mutated_isoforms
333
+ #persist :_ary_mutated_isoforms
149
334
 
150
335
  property :exon_junctions => :array do
151
- Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.values_at(*self)
336
+ Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.chunked_values_at(self)
152
337
  end
153
- persist :exon_junctions
338
+ #persist :exon_junctions
339
+ #persist :_ary_in_exon_junction?
154
340
 
155
- property :in_exon_junction? => :array2single do
156
- exon_junctions.collect{|l| not l.nil? and not l.empty?}
341
+ property :over_range? => :array2single do |range|
342
+ chromosome.zip(position).collect{|chr,pos| chr == gene_chromosome and range.include? pos}
157
343
  end
158
- persist :in_exon_junction?
344
+ #persist :_ary_over_range?
159
345
 
160
346
  property :over_gene? => :array2single do |gene|
161
347
  gene = Gene.setup(gene.dup, "Ensembl Gene ID", organism) unless Gene === gene
162
348
 
163
- gene_range = gene.range
349
+ gene_range = gene.chr_range
164
350
  gene_chromosome = gene.chromosome
165
351
 
166
352
  if gene_range.nil?
@@ -169,19 +355,163 @@ module GenomicMutation
169
355
  chromosome.zip(position).collect{|chr,pos| chr == gene_chromosome and gene_range.include? pos}
170
356
  end
171
357
  end
172
- persist :over_gene?
358
+ #persist :_ary_over_gene?
173
359
 
174
360
  property :affected_exons => :array2single do
175
- Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.values_at *self
361
+ Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.chunked_values_at self
362
+ end
363
+ #persist :affected_exons
364
+
365
+ #property :transcripts_with_affected_splicing => :array2single do
366
+ # exon2transcript_index = GenomicMutation.transcripts_for_exon_index(organism)
367
+ # transcripts = exon_junctions.collect{|junctions|
368
+ # exons = junctions.nil? ? [] : junctions.collect{|exon_junction| exon_junction.split(":").first }
369
+ # exons.empty? ?
370
+ # [] : exon2transcript_index.chunked_values_at(exons).flatten
371
+ # }
372
+ # Transcript.setup(transcripts, "Ensembl Transcript ID", organism)
373
+ #end
374
+
375
+ #property :in_exon_junction? => :array2single do
376
+ # exon_position_index ||= GenomicMutation.exon_position_index(organism)
377
+
378
+ # start_pos = exon_position_index.identify_field "Exon Chr Start"
379
+ # strand_pos = exon_position_index.identify_field "Exon Strand"
380
+ # all_exons = self.genes.flatten.transcripts.compact.flatten.collect{|t| t.exons}.compact.flatten.uniq.select{|e| exon_position_index.include?(e) }.sort_by{|e| exon_position_index[e][start_pos] }
381
+
382
+ # first_exon = all_exons.first
383
+ # last_exon = all_exons.last
384
+
385
+ # exon_junctions.collect{|l|
386
+ # l.select{|j|
387
+ # exon, junction_type = j.split(":")
388
+ # if not exon_position_index.include? exon
389
+ # raise "Exon #{ exon } not in position index"
390
+ # end
391
+ # strand = exon_position_index[exon][strand_pos]
392
+ # case
393
+ # when (strand == 1 and exon == first_exon and junction_type =~ /acceptor/)
394
+ # false
395
+ # when (strand == 1 and exon == last_exon and junction_type =~ /donor/)
396
+ # false
397
+ # when (strand == -1 and exon == first_exon and junction_type =~ /donor/)
398
+ # false
399
+ # when (strand == -1 and exon == last_exon and junction_type =~ /acceptor/)
400
+ # false
401
+ # else
402
+ # true
403
+ # end
404
+ # }
405
+ # }.collect{|l| not l.nil? and not l.empty?}
406
+ #end
407
+
408
+
409
+ property :transcripts_with_affected_splicing => :array2single do
410
+ exon2transcript_index = GenomicMutation.transcripts_for_exon_index(organism)
411
+ transcript_exon_rank = GenomicMutation.exon_rank_index(organism)
412
+
413
+ transcripts = exon_junctions.collect{|junctions|
414
+ if junctions.nil? or junctions.empty?
415
+ []
416
+ else
417
+ junctions.collect{|junction|
418
+ exon, junction_type = junction.split(":")
419
+ transcripts = exon2transcript_index[exon]
420
+ transcripts.select do |transcript|
421
+ transcript_info = transcript_exon_rank[transcript]
422
+
423
+ total_exons = transcript_info[0].length
424
+ rank = transcript_info[1][transcript_info[0].index(exon)].to_i
425
+
426
+ case
427
+ when (rank == 1 and junction_type =~ /acceptor/)
428
+ false
429
+ when (rank == total_exons and junction_type =~ /donor/)
430
+ false
431
+ else
432
+ true
433
+ end
434
+ end
435
+ }.flatten
436
+ end
437
+ }
438
+ Transcript.setup(transcripts, "Ensembl Transcript ID", organism)
439
+ end
440
+
441
+ property :in_exon_junction? => :array2single do |*args|
442
+ gene = args.first
443
+ if gene
444
+ transcripts_with_affected_splicing.collect{|list| list.nil? ? false : list.gene.include?(gene)}
445
+ else
446
+ transcripts_with_affected_splicing.collect{|list| list.nil? ? false : list.any?}
447
+ end
448
+ end
449
+
450
+ property :affected_transcripts => :array2single do
451
+ exon2transcript_index = GenomicMutation.transcripts_for_exon_index(organism)
452
+ transcripts = affected_exons.collect{|exons|
453
+ exons = [] if exons.nil?
454
+ exons.empty? ?
455
+ [] : exon2transcript_index.chunked_values_at(exons).flatten
456
+ }
457
+ Transcript.setup(transcripts, "Ensembl Transcript ID", organism)
458
+ end
459
+ #persist :affected_transcripts
460
+
461
+
462
+ property :coding? => :array2single do
463
+ Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.
464
+ chunked_values_at(self).
465
+ collect{|exons|
466
+ GenomicMutation.transcripts_for_exon_index(organism).values_at(*exons).compact.flatten.any?
467
+ }
176
468
  end
177
- persist :affected_exons
178
469
 
179
470
  property :damaging? => :array2single do |*args|
180
- damaged_mutated_isoforms = mutated_isoforms.compact.flatten.select{|mi| mi.damaged?(*args)}
181
- exon_junctions.zip(mutated_isoforms).collect do |exs, mis|
182
- (Array === exs and exs.any?) or
471
+ all_mutated_isoforms = mutated_isoforms.compact.flatten
472
+ damaged_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.damaged?(*args)}
473
+ exon_junctions.zip(mutated_isoforms, self.type).collect do |exs, mis, type|
474
+ (Array === exs and exs.any? and not type == "none") or
183
475
  (Array === mis and (damaged_mutated_isoforms & mis).any?)
184
476
  end
185
477
  end
186
- persist :damaging?
478
+ #persist :damaging?
479
+
480
+ property :worst_consequence => :array2single do |*args|
481
+ gene = args.first
482
+
483
+ all_mutated_isoforms = mutated_isoforms.compact.flatten
484
+
485
+ all_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.transcript.gene == gene} if gene
486
+
487
+ non_synonymous_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.non_synonymous}
488
+ truncated_mutated_isoforms = all_mutated_isoforms.select{|mi| mi.truncated}
489
+ damage_scores = Misc.process_to_hash(non_synonymous_mutated_isoforms){|mis| mis.any? ? mis.damage_scores : []}
490
+ damaged = all_mutated_isoforms.select{|mi| mi.damaged? }
491
+
492
+ in_exon_junction?(gene).zip(mutated_isoforms, type).collect{|ej,mis,type|
493
+ case
494
+ when (mis.nil? or mis.subset(non_synonymous_mutated_isoforms).empty? and ej and not type == 'none')
495
+ "In Exon Junction"
496
+ when (Array === mis and mis.subset(truncated_mutated_isoforms).any?)
497
+ mis.subset(truncated_mutated_isoforms).first
498
+ when (Array === mis and mis.subset(non_synonymous_mutated_isoforms).any?)
499
+ mis.subset(non_synonymous_mutated_isoforms).sort{|mi1, mi2|
500
+ ds1 = damage_scores[mi1] || 0
501
+ ds2 = damage_scores[mi2] || 0
502
+ case
503
+ when (damaged.include?(mi1) == damaged.include?(mi2))
504
+ d1 = mi1.protein.interpro_domains || []
505
+ d2 = mi2.protein.interpro_domains || []
506
+ d1.length <=> d2.length
507
+ else
508
+ ds1 <=> ds2
509
+ end
510
+ }.last
511
+ else
512
+ nil
513
+ end
514
+ }
515
+ end
187
516
  end
517
+
@@ -51,14 +51,20 @@ module Genotype
51
51
 
52
52
  def jobname
53
53
  if @jobname.nil?
54
- @jobname ||= "Meta-genotype: " + self.collect{|g| g.jobname} * ", "
55
- @jobname[100..-1] = " (etc; #{self.length} genotypes)" if @jobname.length > 100
54
+ if self.length > 3
55
+ @jobname ||= "Meta-genotype: " + self.collect{|g| g.jobname}[0..2] * ", " + " + #{self.length - 3} more"
56
+ else
57
+ @jobname ||= "Meta-genotype: " + self.collect{|g| g.jobname} * ", "
58
+ end
59
+ @jobname[100..-1] = " (etc. #{self.length} genotypes)" if @jobname.length > 135
56
60
  end
57
61
  @jobname
58
62
  end
59
63
 
60
64
  def metagenotype
61
- GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].orig_watson).extend Genotype
65
+ organism = self.collect{|g| g.organism}.compact.first
66
+ orig_watson = self.collect{|g| g.orig_watson}.compact.first
67
+ GenomicMutation.setup(self.dup.flatten, jobname, organism, orig_watson).extend Genotype
62
68
  end
63
69
 
64
70
 
@@ -123,7 +129,7 @@ module Genotype
123
129
  returns "Ensembl Gene ID"
124
130
  task :with_non_synonymous_mutations => :array do
125
131
  set_info :organism, genotype.organism
126
- genotype.mutated_isoforms.flatten.compact.reject{|mutated_isoform| ["SYNONYMOUS", "UTR"].include? mutated_isoform.consequence}.transcript.gene.uniq
132
+ genotype.mutated_isoforms.flatten.compact.select{|mutated_isoform| mutated_isoform.non_synonymous}.transcript.gene.uniq
127
133
  end
128
134
 
129
135
  returns "Ensembl Gene ID"
@@ -0,0 +1,6 @@
1
+
2
+ module Interactor
3
+ extend Annotation
4
+ self.annotation :interaction_method
5
+ self.annotation :interaction_evidence
6
+ end