rbbt-study 0.2.30 → 0.2.31

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,34 +0,0 @@
1
- module Study
2
- property :all_mutations do
3
- cohort.metagenotype.sort.tap{|o| o.jobname = "All mutations in #{ self }"; o.organism ||= organism; o.watson ||= watson }
4
- end
5
-
6
- property :relevant_mutations do
7
- all_mutations = self.all_mutations
8
-
9
- all_mutations.select_by(:relevant?).tap{|o| o.jobname = "Relevant mutations in #{ self }" }
10
- end
11
-
12
- property :damaging_mutations do |*args|
13
- relevant_mutations.select_by(:damaging?, *args).tap{|o| o.jobname = "Damaging mutations in #{ self }" }
14
- end
15
-
16
- property :mutations_altering_isoform_sequence do
17
- relevant_mutations.select{|m|
18
- mis = m.mutated_isoforms; not mis.nil? and mis.select{|m| m.consequence != "SYNONYMOUS"}.any?
19
- }.tap{|o| o.jobname = "Mutations altering isoform sequence in #{ self }"}
20
- end
21
-
22
- property :mutations_affecting_splicing_sites do
23
- relevant_mutations.select_by(:transcripts_with_affected_splicing){|ts| ts.any? }.
24
- tap{|o| o.jobname = "Mutations affecting splicing sites in #{ self }"}
25
- end
26
-
27
- property :mutations_over_gene do |gene|
28
- all_mutations.select_by(:genes){|genes| genes and genes.include? gene}
29
- end
30
-
31
- property :mutations_over_gene_list do |list|
32
- all_mutations.select_by(:genes){|genes| genes and (genes & list).any?}
33
- end
34
- end
@@ -1,28 +0,0 @@
1
- module Sample
2
- property :has_genotype? => :array2single do
3
- study.cohort.values_at(*self).collect{|g| not g.nil?}
4
- end
5
-
6
- property :mutations do
7
- Study.setup(study)
8
- study.cohort[self]
9
- end
10
-
11
- property :relevant_mutations do
12
- mutations.select_by(:relevant?)
13
- end
14
-
15
- property :damaging_mutations do |*args|
16
- mutations.select_by(:damaging?, *args)
17
- end
18
-
19
- property :affected_genes do
20
- mutations.affected_genes.compact.flatten.uniq
21
- end
22
-
23
- property :damaged_genes do |*args|
24
- mutations.damaged_genes(*args).compact.flatten.uniq
25
- end
26
- end
27
-
28
-
@@ -1,35 +0,0 @@
1
- module Study
2
-
3
- class << self
4
- attr_accessor :knowledge_base, :study_registry
5
- def knowledge_base
6
- @knowledge_base ||= KnowledgeBase.new Rbbt.var.knowledge_base.Study
7
- end
8
-
9
- def study_registry
10
- @study_registry ||= {}
11
- end
12
- end
13
-
14
- attr_accessor :knowledge_base
15
-
16
- def knowledge_base
17
- @knowledge_base ||= begin
18
- kb = KnowledgeBase.new(Rbbt.var.knowledge_base.studies[self], self.organism)
19
- kb.format["Gene"] = "Ensembl Gene ID"
20
- kb.entity_options["Sample"] = {"Study" => self}
21
- Study.study_registry.each do |database, file|
22
- Log.debug("Inheriting #{ database } from registry: #{Misc.fingerprint file}")
23
- if Proc === file
24
- study = self
25
- block = Proc.new{ file.call(self, database) }
26
- kb.register database, nil, {}, &block
27
- else
28
- kb.register database, file
29
- end
30
- end
31
- kb
32
- end
33
- end
34
- end
35
-
@@ -1,90 +0,0 @@
1
- require 'rbbt/entity/methylation'
2
-
3
- require 'rbbt/entity/study/methylation/samples'
4
-
5
- module StudyWorkflow
6
- helper :organism do
7
- study.metadata[:organism]
8
- end
9
- end
10
-
11
- module Study
12
- def has_methylation?
13
- dir.methylation.exists?
14
- end
15
-
16
- def methylation_files
17
- dir.methylation.find.glob("*")
18
- end
19
-
20
- def methylation_cohort
21
- if @methylation_cohort.nil?
22
- @methylation_cohort = {}
23
- methylation_files.each do |f|
24
- sample = File.basename(f)
25
- Sample.setup(sample, self)
26
- methylations = Open.read(f).split("\n").sort
27
- Methylation.setup(methylations, organism)
28
- @methylation_cohort[sample] = methylations
29
- end
30
- end
31
- @methylation_cohort
32
- end
33
- end
34
-
35
- module Study
36
- property :recurrently_lost_genes => :single do |threshold|
37
- counts = {}
38
- self.samples.each do |sample|
39
- next unless sample.has_methylation?
40
- puts sample
41
-
42
- genes = nil
43
- genes = sample.lost_genes.clean_annotations
44
- genes.each do |gene|
45
- counts[gene] ||= 0
46
- counts[gene] += 1
47
- end
48
- end
49
-
50
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
51
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
52
- end
53
-
54
- property :recurrently_gained_genes => :single do |threshold|
55
- counts = {}
56
- self.samples.each do |sample|
57
- next unless sample.has_methylation?
58
- puts sample
59
-
60
- genes = nil
61
- genes = sample.gained_genes.clean_annotations
62
- genes.each do |gene|
63
- counts[gene] ||= 0
64
- counts[gene] += 1
65
- end
66
- end
67
-
68
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
69
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
70
- end
71
-
72
- property :gene_sample_methylation_matrix => :single do
73
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
74
- samples = []
75
- i = 0
76
- num_samples = cohort.length
77
- methylation_cohort.each do |sample,methylation|
78
- methylation.genes.compact.flatten.uniq.each do |gene|
79
- tsv[gene] ||= ["FALSE"] * num_samples
80
- tsv[gene][i] = "TRUE"
81
- end
82
- samples << sample
83
- i += 1
84
- end
85
-
86
- tsv.fields = samples
87
-
88
- tsv
89
- end
90
- end
@@ -1,31 +0,0 @@
1
- module Sample
2
- property :methylation => :array2single do
3
- study.has_methylation? ? study.methylation_cohort : {}
4
- end
5
-
6
- property :has_methylation? => :array2single do
7
- study.has_methylation? ? study.methylation_cohort.values_at(*self).collect{|methylation| not methylation.nil?} : [false] * self.length
8
- end
9
-
10
- property :methylated => :single do
11
- return [] if methylation.empty?
12
- methylation.select_by(:methylated?)
13
- end
14
-
15
- property :unmethylated => :single do
16
- return [] if methylation.empty?
17
- methylation.select_by(:unmethylated?)
18
- end
19
-
20
- property :methylated_genes => :single do
21
- return [] if methylated.empty?
22
- Gene.setup(methylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
23
- end
24
-
25
- property :unmethylated_genes => :single do
26
- return [] if unmethylated.empty?
27
- Gene.setup(unmethylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
28
- end
29
- end
30
-
31
-
@@ -1,259 +0,0 @@
1
- task :mutations_by_change => :tsv do
2
- changes = {}
3
-
4
- study.cohort.each do |genotype|
5
- genotype.watson ||= watson
6
- genotype.each do |mutation|
7
- reference = watson ? mutation.reference : mutation.gene_strand_reference
8
- base = mutation.base
9
- base = ((Misc::IUPAC2BASE[base] || []) - [reference]) * ","
10
- change = [reference, base]
11
- changes[change * ">"] ||= []
12
- changes[change * ">"] << mutation.clean_annotations
13
- end
14
- end
15
-
16
- TSV.setup(changes, :key_field => "Genomic Change", :fields => ["Genomic Mutation"], :namespace => organism, :type => :flat)
17
-
18
- changes.entity_options = {:watson => watson}
19
-
20
- changes
21
- end
22
-
23
-
24
- dep :mutations_by_change
25
- task :mutation_change_counts => :yaml do
26
- change_counts = {}
27
-
28
- step(:mutations_by_change).load.each do |change, mutations|
29
- change_counts[change] = mutations.length
30
- end
31
-
32
- change_counts
33
- end
34
-
35
- returns "Genomic Mutation"
36
- task :transversions => :annotations do
37
-
38
- mutations = study.cohort.collect{|genotype|
39
-
40
- genotype.select{|mutation|
41
-
42
- mutation.type == "transversion"
43
-
44
- }
45
-
46
- }.flatten
47
-
48
- GenomicMutation.setup(mutations, "#{ study }: transversions", organism, watson)
49
-
50
- end
51
-
52
- returns "Genomic Mutation"
53
- task :transitions => :annotations do
54
-
55
- mutations = study.cohort.collect{|genotype|
56
-
57
- genotype.select{|mutation|
58
-
59
- mutation.type == "transition"
60
-
61
- }
62
-
63
- }.flatten
64
-
65
- GenomicMutation.setup(mutations, "#{ study }: transitions", organism, watson)
66
-
67
- end
68
-
69
- returns "Genomic Mutation"
70
- task :indels => :annotations do
71
-
72
- mutations = study.cohort.collect{|genotype|
73
-
74
- genotype.select{|mutation|
75
-
76
- mutation.type == "indel"
77
-
78
- }
79
-
80
- }.flatten
81
-
82
- GenomicMutation.setup(mutations, "#{ study }: indels", organism, watson)
83
- end
84
-
85
- returns "Genomic Mutation"
86
- task :unknown_mutations => :annotations do
87
-
88
- mutations = study.cohort.collect{|genotype|
89
-
90
- genotype.select{|mutation|
91
-
92
- mutation.type == "unknown"
93
-
94
- }
95
-
96
- }.flatten
97
-
98
- GenomicMutation.setup(mutations, "#{ study }: unknown_mutations", organism, watson)
99
- end
100
-
101
-
102
- returns "Genomic Mutation"
103
- task :not_mutations => :annotations do
104
-
105
- mutations = study.cohort.collect{|genotype|
106
-
107
- genotype.select{|mutation|
108
-
109
- mutation.type == "none"
110
-
111
- }
112
-
113
- }.flatten
114
-
115
- GenomicMutation.setup(mutations, "#{ study }: not mutations", organism, watson)
116
- end
117
-
118
-
119
-
120
- returns "Genomic Mutation"
121
- task :non_synonymous_mutations => :annotations do
122
-
123
- mutations = study.cohort.collect{|genotype|
124
-
125
- genotype.select{|mutation|
126
-
127
- (mutation.mutated_isoforms || [] ).select{|mi| mi.non_synonymous }.any?
128
-
129
- }
130
-
131
- }.flatten
132
-
133
- GenomicMutation.setup(mutations, "#{ study }: non_synonymous mutations", organism, watson)
134
- end
135
-
136
- dep :non_synonymous_mutations
137
- returns "Genomic Mutation"
138
- task :synonymous_mutations => :annotations do
139
- non_synonymous_mutations = step(:non_synonymous_mutations).load
140
-
141
- mutations = study.cohort.collect{|genotype|
142
-
143
- genotype.remove( non_synonymous_mutations )
144
-
145
- }.flatten
146
-
147
- GenomicMutation.setup(mutations, "#{ study }: synonymous mutations", organism, watson)
148
- end
149
-
150
- #dep :synonymous_mutations
151
- #dep :exon_junction_mutations
152
- #input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
153
- #returns "Genomic Mutation"
154
- #task :damaging_mutations => :annotations do |methods|
155
- # synonymous_mutations = step(:synonymous_mutations).load
156
- # exon_junction_mutations = step(:exon_junction_mutations).load
157
- #
158
- # mutations_to_remove = synonymous_mutations - exon_junction_mutations
159
- #
160
- # mutations = study.cohort.collect{|genotype|
161
- #
162
- # genotype.remove( mutations_to_remove ).select{|mutation| mutation.damaging?(methods) }
163
- #
164
- # }.flatten
165
- #
166
- # GenomicMutation.setup(mutations, "#{ study }: damaging mutations", organism, watson)
167
- #end
168
-
169
- dep :relevant_mutations
170
- input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
171
- returns "Genomic Mutation"
172
- task :damaging_mutations => :annotations do |methods|
173
- relevant_mutations = step(:relevant_mutations ).load
174
-
175
- mutations = relevant_mutations.select{|mutation| mutation.damaging?(methods) }
176
-
177
- GenomicMutation.setup(mutations, "#{ study }: damaging mutations", organism, watson)
178
- end
179
-
180
-
181
- dep :damaging_mutations
182
- dep :relevant_mutations
183
- input :methods, :array, "Damage prediction methods", [:sift]
184
- returns "Genomic Mutation"
185
- task :mutations_missing_predictions => :annotations do |methods|
186
- damaging_mutations = step(:damaging_mutations).load
187
- relevant_mutations = step(:relevant_mutations).load
188
-
189
- missing_mutations = relevant_mutations.remove(damaging_mutations)
190
- missing_mutations_mutated_isoforms = missing_mutations.mutated_isoforms.compact.flatten
191
- mutated_isoforms_missing_damage_scores = missing_mutations_mutated_isoforms.select{|mis| mis.damage_scores.nil?}
192
- mutations_missing_predictions = missing_mutations.select{|mutation| mutation.mutated_isoforms and mutation.mutated_isoforms.any?}.select{|mutation| mutation.mutated_isoforms.remove(mutated_isoforms_missing_damage_scores).empty?}
193
- GenomicMutation.setup(mutations_missing_predictions, "#{ study }: mutations missing predictions", organism, watson)
194
- end
195
-
196
- returns "Genomic Mutation"
197
- task :exon_junction_mutations => :annotations do
198
-
199
- mutations = study.cohort.collect{|genotype|
200
-
201
- genotype.select{|mutation| mutation.transcripts_with_affected_splicing.any? and not mutation.type == "none"}
202
-
203
- }.flatten
204
-
205
- GenomicMutation.setup(mutations, "#{ study }: exon junction mutations", organism, watson)
206
- end
207
-
208
- dep :non_synonymous_mutations
209
- dep :exon_junction_mutations
210
- returns "Genomic Mutation"
211
- task :relevant_mutations => :annotations do
212
- non_synonymous_mutations = step(:non_synonymous_mutations).load
213
- exon_junction_mutations = step(:exon_junction_mutations).load
214
-
215
- all_relevant_mutations = ( exon_junction_mutations + non_synonymous_mutations.remove(exon_junction_mutations) ).flatten
216
-
217
- GenomicMutation.setup(all_relevant_mutations, "#{ study }: relevant mutations", organism, watson)
218
- end
219
-
220
- dep :relevant_mutations
221
- returns "Genomic Mutation"
222
- task :recurrent_mutations => :annotations do
223
- relevant_mutations = step(:relevant_mutations).load
224
-
225
- mutations = Misc.counts(relevant_mutations.remove_score).select{|mutation, count|
226
-
227
- count > 1
228
-
229
- }.collect{|mutation, count| mutation}
230
-
231
- GenomicMutation.setup(mutations, "#{study}: recurrent mutations", organism, watson)
232
- end
233
-
234
- dep :non_synonymous_mutations
235
- task :mutations_by_consequence => :yaml do
236
- non_synonymous_mutations = step(:non_synonymous_mutations).load
237
-
238
- mutations_by_consequence = {}
239
- study.cohort.each do |genotype|
240
- genotype.subset(non_synonymous_mutations).each do |mutation|
241
- mis = mutation.mutated_isoforms
242
- next if mis.nil?
243
- consequences = mis.consequence.compact.uniq
244
- consequences.each{|consequence| mutations_by_consequence[consequence] ||= []; mutations_by_consequence[consequence] << mutation }
245
- end
246
- end
247
-
248
- mutations_by_consequence
249
- end
250
- %w(missense_mutations nonsense_mutations frameshift_mutations nostop_mutations indel_mutations utr_mutations ).zip(
251
- %w(MISS-SENSE NONSENSE FRAMESHIFT NOSTOP INDEL UTR)).each do |task_name, consequence|
252
- dep :mutations_by_consequence
253
- returns "Genomic Mutation"
254
- task task_name => :annotations do
255
- mutations_by_consequence = step(:mutations_by_consequence).load
256
- GenomicMutation.setup(mutations_by_consequence[consequence] || [], "#{study}: mutations with #{consequence.downcase} isoform mutations", organism, watson)
257
- end
258
- end
259
-