rbbt-study 0.2.30 → 0.2.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,34 +0,0 @@
1
- module Study
2
- property :all_mutations do
3
- cohort.metagenotype.sort.tap{|o| o.jobname = "All mutations in #{ self }"; o.organism ||= organism; o.watson ||= watson }
4
- end
5
-
6
- property :relevant_mutations do
7
- all_mutations = self.all_mutations
8
-
9
- all_mutations.select_by(:relevant?).tap{|o| o.jobname = "Relevant mutations in #{ self }" }
10
- end
11
-
12
- property :damaging_mutations do |*args|
13
- relevant_mutations.select_by(:damaging?, *args).tap{|o| o.jobname = "Damaging mutations in #{ self }" }
14
- end
15
-
16
- property :mutations_altering_isoform_sequence do
17
- relevant_mutations.select{|m|
18
- mis = m.mutated_isoforms; not mis.nil? and mis.select{|m| m.consequence != "SYNONYMOUS"}.any?
19
- }.tap{|o| o.jobname = "Mutations altering isoform sequence in #{ self }"}
20
- end
21
-
22
- property :mutations_affecting_splicing_sites do
23
- relevant_mutations.select_by(:transcripts_with_affected_splicing){|ts| ts.any? }.
24
- tap{|o| o.jobname = "Mutations affecting splicing sites in #{ self }"}
25
- end
26
-
27
- property :mutations_over_gene do |gene|
28
- all_mutations.select_by(:genes){|genes| genes and genes.include? gene}
29
- end
30
-
31
- property :mutations_over_gene_list do |list|
32
- all_mutations.select_by(:genes){|genes| genes and (genes & list).any?}
33
- end
34
- end
@@ -1,28 +0,0 @@
1
- module Sample
2
- property :has_genotype? => :array2single do
3
- study.cohort.values_at(*self).collect{|g| not g.nil?}
4
- end
5
-
6
- property :mutations do
7
- Study.setup(study)
8
- study.cohort[self]
9
- end
10
-
11
- property :relevant_mutations do
12
- mutations.select_by(:relevant?)
13
- end
14
-
15
- property :damaging_mutations do |*args|
16
- mutations.select_by(:damaging?, *args)
17
- end
18
-
19
- property :affected_genes do
20
- mutations.affected_genes.compact.flatten.uniq
21
- end
22
-
23
- property :damaged_genes do |*args|
24
- mutations.damaged_genes(*args).compact.flatten.uniq
25
- end
26
- end
27
-
28
-
@@ -1,35 +0,0 @@
1
- module Study
2
-
3
- class << self
4
- attr_accessor :knowledge_base, :study_registry
5
- def knowledge_base
6
- @knowledge_base ||= KnowledgeBase.new Rbbt.var.knowledge_base.Study
7
- end
8
-
9
- def study_registry
10
- @study_registry ||= {}
11
- end
12
- end
13
-
14
- attr_accessor :knowledge_base
15
-
16
- def knowledge_base
17
- @knowledge_base ||= begin
18
- kb = KnowledgeBase.new(Rbbt.var.knowledge_base.studies[self], self.organism)
19
- kb.format["Gene"] = "Ensembl Gene ID"
20
- kb.entity_options["Sample"] = {"Study" => self}
21
- Study.study_registry.each do |database, file|
22
- Log.debug("Inheriting #{ database } from registry: #{Misc.fingerprint file}")
23
- if Proc === file
24
- study = self
25
- block = Proc.new{ file.call(self, database) }
26
- kb.register database, nil, {}, &block
27
- else
28
- kb.register database, file
29
- end
30
- end
31
- kb
32
- end
33
- end
34
- end
35
-
@@ -1,90 +0,0 @@
1
- require 'rbbt/entity/methylation'
2
-
3
- require 'rbbt/entity/study/methylation/samples'
4
-
5
- module StudyWorkflow
6
- helper :organism do
7
- study.metadata[:organism]
8
- end
9
- end
10
-
11
- module Study
12
- def has_methylation?
13
- dir.methylation.exists?
14
- end
15
-
16
- def methylation_files
17
- dir.methylation.find.glob("*")
18
- end
19
-
20
- def methylation_cohort
21
- if @methylation_cohort.nil?
22
- @methylation_cohort = {}
23
- methylation_files.each do |f|
24
- sample = File.basename(f)
25
- Sample.setup(sample, self)
26
- methylations = Open.read(f).split("\n").sort
27
- Methylation.setup(methylations, organism)
28
- @methylation_cohort[sample] = methylations
29
- end
30
- end
31
- @methylation_cohort
32
- end
33
- end
34
-
35
- module Study
36
- property :recurrently_lost_genes => :single do |threshold|
37
- counts = {}
38
- self.samples.each do |sample|
39
- next unless sample.has_methylation?
40
- puts sample
41
-
42
- genes = nil
43
- genes = sample.lost_genes.clean_annotations
44
- genes.each do |gene|
45
- counts[gene] ||= 0
46
- counts[gene] += 1
47
- end
48
- end
49
-
50
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
51
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
52
- end
53
-
54
- property :recurrently_gained_genes => :single do |threshold|
55
- counts = {}
56
- self.samples.each do |sample|
57
- next unless sample.has_methylation?
58
- puts sample
59
-
60
- genes = nil
61
- genes = sample.gained_genes.clean_annotations
62
- genes.each do |gene|
63
- counts[gene] ||= 0
64
- counts[gene] += 1
65
- end
66
- end
67
-
68
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
69
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
70
- end
71
-
72
- property :gene_sample_methylation_matrix => :single do
73
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
74
- samples = []
75
- i = 0
76
- num_samples = cohort.length
77
- methylation_cohort.each do |sample,methylation|
78
- methylation.genes.compact.flatten.uniq.each do |gene|
79
- tsv[gene] ||= ["FALSE"] * num_samples
80
- tsv[gene][i] = "TRUE"
81
- end
82
- samples << sample
83
- i += 1
84
- end
85
-
86
- tsv.fields = samples
87
-
88
- tsv
89
- end
90
- end
@@ -1,31 +0,0 @@
1
- module Sample
2
- property :methylation => :array2single do
3
- study.has_methylation? ? study.methylation_cohort : {}
4
- end
5
-
6
- property :has_methylation? => :array2single do
7
- study.has_methylation? ? study.methylation_cohort.values_at(*self).collect{|methylation| not methylation.nil?} : [false] * self.length
8
- end
9
-
10
- property :methylated => :single do
11
- return [] if methylation.empty?
12
- methylation.select_by(:methylated?)
13
- end
14
-
15
- property :unmethylated => :single do
16
- return [] if methylation.empty?
17
- methylation.select_by(:unmethylated?)
18
- end
19
-
20
- property :methylated_genes => :single do
21
- return [] if methylated.empty?
22
- Gene.setup(methylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
23
- end
24
-
25
- property :unmethylated_genes => :single do
26
- return [] if unmethylated.empty?
27
- Gene.setup(unmethylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
28
- end
29
- end
30
-
31
-
@@ -1,259 +0,0 @@
1
- task :mutations_by_change => :tsv do
2
- changes = {}
3
-
4
- study.cohort.each do |genotype|
5
- genotype.watson ||= watson
6
- genotype.each do |mutation|
7
- reference = watson ? mutation.reference : mutation.gene_strand_reference
8
- base = mutation.base
9
- base = ((Misc::IUPAC2BASE[base] || []) - [reference]) * ","
10
- change = [reference, base]
11
- changes[change * ">"] ||= []
12
- changes[change * ">"] << mutation.clean_annotations
13
- end
14
- end
15
-
16
- TSV.setup(changes, :key_field => "Genomic Change", :fields => ["Genomic Mutation"], :namespace => organism, :type => :flat)
17
-
18
- changes.entity_options = {:watson => watson}
19
-
20
- changes
21
- end
22
-
23
-
24
- dep :mutations_by_change
25
- task :mutation_change_counts => :yaml do
26
- change_counts = {}
27
-
28
- step(:mutations_by_change).load.each do |change, mutations|
29
- change_counts[change] = mutations.length
30
- end
31
-
32
- change_counts
33
- end
34
-
35
- returns "Genomic Mutation"
36
- task :transversions => :annotations do
37
-
38
- mutations = study.cohort.collect{|genotype|
39
-
40
- genotype.select{|mutation|
41
-
42
- mutation.type == "transversion"
43
-
44
- }
45
-
46
- }.flatten
47
-
48
- GenomicMutation.setup(mutations, "#{ study }: transversions", organism, watson)
49
-
50
- end
51
-
52
- returns "Genomic Mutation"
53
- task :transitions => :annotations do
54
-
55
- mutations = study.cohort.collect{|genotype|
56
-
57
- genotype.select{|mutation|
58
-
59
- mutation.type == "transition"
60
-
61
- }
62
-
63
- }.flatten
64
-
65
- GenomicMutation.setup(mutations, "#{ study }: transitions", organism, watson)
66
-
67
- end
68
-
69
- returns "Genomic Mutation"
70
- task :indels => :annotations do
71
-
72
- mutations = study.cohort.collect{|genotype|
73
-
74
- genotype.select{|mutation|
75
-
76
- mutation.type == "indel"
77
-
78
- }
79
-
80
- }.flatten
81
-
82
- GenomicMutation.setup(mutations, "#{ study }: indels", organism, watson)
83
- end
84
-
85
- returns "Genomic Mutation"
86
- task :unknown_mutations => :annotations do
87
-
88
- mutations = study.cohort.collect{|genotype|
89
-
90
- genotype.select{|mutation|
91
-
92
- mutation.type == "unknown"
93
-
94
- }
95
-
96
- }.flatten
97
-
98
- GenomicMutation.setup(mutations, "#{ study }: unknown_mutations", organism, watson)
99
- end
100
-
101
-
102
- returns "Genomic Mutation"
103
- task :not_mutations => :annotations do
104
-
105
- mutations = study.cohort.collect{|genotype|
106
-
107
- genotype.select{|mutation|
108
-
109
- mutation.type == "none"
110
-
111
- }
112
-
113
- }.flatten
114
-
115
- GenomicMutation.setup(mutations, "#{ study }: not mutations", organism, watson)
116
- end
117
-
118
-
119
-
120
- returns "Genomic Mutation"
121
- task :non_synonymous_mutations => :annotations do
122
-
123
- mutations = study.cohort.collect{|genotype|
124
-
125
- genotype.select{|mutation|
126
-
127
- (mutation.mutated_isoforms || [] ).select{|mi| mi.non_synonymous }.any?
128
-
129
- }
130
-
131
- }.flatten
132
-
133
- GenomicMutation.setup(mutations, "#{ study }: non_synonymous mutations", organism, watson)
134
- end
135
-
136
- dep :non_synonymous_mutations
137
- returns "Genomic Mutation"
138
- task :synonymous_mutations => :annotations do
139
- non_synonymous_mutations = step(:non_synonymous_mutations).load
140
-
141
- mutations = study.cohort.collect{|genotype|
142
-
143
- genotype.remove( non_synonymous_mutations )
144
-
145
- }.flatten
146
-
147
- GenomicMutation.setup(mutations, "#{ study }: synonymous mutations", organism, watson)
148
- end
149
-
150
- #dep :synonymous_mutations
151
- #dep :exon_junction_mutations
152
- #input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
153
- #returns "Genomic Mutation"
154
- #task :damaging_mutations => :annotations do |methods|
155
- # synonymous_mutations = step(:synonymous_mutations).load
156
- # exon_junction_mutations = step(:exon_junction_mutations).load
157
- #
158
- # mutations_to_remove = synonymous_mutations - exon_junction_mutations
159
- #
160
- # mutations = study.cohort.collect{|genotype|
161
- #
162
- # genotype.remove( mutations_to_remove ).select{|mutation| mutation.damaging?(methods) }
163
- #
164
- # }.flatten
165
- #
166
- # GenomicMutation.setup(mutations, "#{ study }: damaging mutations", organism, watson)
167
- #end
168
-
169
- dep :relevant_mutations
170
- input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
171
- returns "Genomic Mutation"
172
- task :damaging_mutations => :annotations do |methods|
173
- relevant_mutations = step(:relevant_mutations ).load
174
-
175
- mutations = relevant_mutations.select{|mutation| mutation.damaging?(methods) }
176
-
177
- GenomicMutation.setup(mutations, "#{ study }: damaging mutations", organism, watson)
178
- end
179
-
180
-
181
- dep :damaging_mutations
182
- dep :relevant_mutations
183
- input :methods, :array, "Damage prediction methods", [:sift]
184
- returns "Genomic Mutation"
185
- task :mutations_missing_predictions => :annotations do |methods|
186
- damaging_mutations = step(:damaging_mutations).load
187
- relevant_mutations = step(:relevant_mutations).load
188
-
189
- missing_mutations = relevant_mutations.remove(damaging_mutations)
190
- missing_mutations_mutated_isoforms = missing_mutations.mutated_isoforms.compact.flatten
191
- mutated_isoforms_missing_damage_scores = missing_mutations_mutated_isoforms.select{|mis| mis.damage_scores.nil?}
192
- mutations_missing_predictions = missing_mutations.select{|mutation| mutation.mutated_isoforms and mutation.mutated_isoforms.any?}.select{|mutation| mutation.mutated_isoforms.remove(mutated_isoforms_missing_damage_scores).empty?}
193
- GenomicMutation.setup(mutations_missing_predictions, "#{ study }: mutations missing predictions", organism, watson)
194
- end
195
-
196
- returns "Genomic Mutation"
197
- task :exon_junction_mutations => :annotations do
198
-
199
- mutations = study.cohort.collect{|genotype|
200
-
201
- genotype.select{|mutation| mutation.transcripts_with_affected_splicing.any? and not mutation.type == "none"}
202
-
203
- }.flatten
204
-
205
- GenomicMutation.setup(mutations, "#{ study }: exon junction mutations", organism, watson)
206
- end
207
-
208
- dep :non_synonymous_mutations
209
- dep :exon_junction_mutations
210
- returns "Genomic Mutation"
211
- task :relevant_mutations => :annotations do
212
- non_synonymous_mutations = step(:non_synonymous_mutations).load
213
- exon_junction_mutations = step(:exon_junction_mutations).load
214
-
215
- all_relevant_mutations = ( exon_junction_mutations + non_synonymous_mutations.remove(exon_junction_mutations) ).flatten
216
-
217
- GenomicMutation.setup(all_relevant_mutations, "#{ study }: relevant mutations", organism, watson)
218
- end
219
-
220
- dep :relevant_mutations
221
- returns "Genomic Mutation"
222
- task :recurrent_mutations => :annotations do
223
- relevant_mutations = step(:relevant_mutations).load
224
-
225
- mutations = Misc.counts(relevant_mutations.remove_score).select{|mutation, count|
226
-
227
- count > 1
228
-
229
- }.collect{|mutation, count| mutation}
230
-
231
- GenomicMutation.setup(mutations, "#{study}: recurrent mutations", organism, watson)
232
- end
233
-
234
- dep :non_synonymous_mutations
235
- task :mutations_by_consequence => :yaml do
236
- non_synonymous_mutations = step(:non_synonymous_mutations).load
237
-
238
- mutations_by_consequence = {}
239
- study.cohort.each do |genotype|
240
- genotype.subset(non_synonymous_mutations).each do |mutation|
241
- mis = mutation.mutated_isoforms
242
- next if mis.nil?
243
- consequences = mis.consequence.compact.uniq
244
- consequences.each{|consequence| mutations_by_consequence[consequence] ||= []; mutations_by_consequence[consequence] << mutation }
245
- end
246
- end
247
-
248
- mutations_by_consequence
249
- end
250
- %w(missense_mutations nonsense_mutations frameshift_mutations nostop_mutations indel_mutations utr_mutations ).zip(
251
- %w(MISS-SENSE NONSENSE FRAMESHIFT NOSTOP INDEL UTR)).each do |task_name, consequence|
252
- dep :mutations_by_consequence
253
- returns "Genomic Mutation"
254
- task task_name => :annotations do
255
- mutations_by_consequence = step(:mutations_by_consequence).load
256
- GenomicMutation.setup(mutations_by_consequence[consequence] || [], "#{study}: mutations with #{consequence.downcase} isoform mutations", organism, watson)
257
- end
258
- end
259
-