rbbt-study 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,103 @@
1
+ module Study
2
+ property :genes_with_overlapping_mutations => :single do
3
+ mutations = cohort.metagenotype
4
+ mutations.genes.compact.flatten.uniq
5
+ end
6
+
7
+ property :altered_isoforms => :single do
8
+ mutated_isoforms = cohort.metagenotype.subset(relevant_mutations).mutated_isoforms.compact.flatten.uniq
9
+ return [] if mutated_isoforms.empty?
10
+ mutated_isoforms.select_by(:consequence){|c| c != "SYNONYMOUS"}
11
+ end
12
+
13
+ property :genes_with_altered_isoform_sequence => :single do
14
+ altered_isoforms = self.altered_isoforms
15
+ return [] if altered_isoforms.empty?
16
+ altered_isoforms.transcript.compact.gene.uniq
17
+ end
18
+
19
+ property :damaged_isoforms => :single do |*args|
20
+ altered_isoforms = self.altered_isoforms
21
+ return [] if altered_isoforms.empty?
22
+ altered_isoforms.select_by(:damaged?, *args)
23
+ end
24
+
25
+ property :genes_with_damaged_isoforms => :single do |*args|
26
+ damaged_isoforms = damaged_isoforms(*args)
27
+ return [] if damaged_isoforms.empty?
28
+ damaged_isoforms.transcript.gene.uniq
29
+ end
30
+
31
+ property :genes_with_affected_splicing_sites => :single do
32
+ cohort.metagenotype.subset(relevant_mutations).transcripts_with_affected_splicing.compact.flatten.uniq.gene.compact.uniq
33
+ end
34
+
35
+ property :affected_genes => :single do
36
+ Gene.setup(genes_with_altered_isoform_sequence + genes_with_affected_splicing_sites, "Ensembl Gene ID", organism).uniq
37
+ end
38
+
39
+ property :damaged_genes => :single do |*args|
40
+ Gene.setup((genes_with_damaged_isoforms(*args) + genes_with_affected_splicing_sites).uniq, "Ensembl Gene ID", organism)
41
+ end
42
+
43
+ property :samples_with_gene_damaged => :single do
44
+ damaging_mutations= self.damaging_mutations
45
+
46
+ samples_with_gene_damaged = {}
47
+ cohort.each do |genotype|
48
+ genotype.each do |mutation|
49
+ next unless damaging_mutations.include? mutation
50
+ genes = []
51
+ mis = mutation.mutated_isoforms
52
+ genes.concat mis.select_by(:damaged?).transcript.gene unless mis.nil? or mis.empty?
53
+ genes.concat mutation.transcripts_with_affected_splicing.gene
54
+ genes.uniq.each{|gene| samples_with_gene_damaged[gene] ||= []; samples_with_gene_damaged[gene] << genotype.jobname}
55
+ end
56
+ end
57
+ samples_with_gene_damaged
58
+ end
59
+
60
+ property :samples_with_gene_affected => :single do
61
+ relevant_mutations = self.relevant_mutations
62
+
63
+ samples_with_gene_affected = {}
64
+ cohort.each do |genotype|
65
+ genotype.each do |mutation|
66
+ next unless relevant_mutations.include? mutation
67
+ genes = []
68
+ mis = mutation.mutated_isoforms
69
+ genes.concat mis.select_by(:consequence){|c| c != "SYNONYMOUS"}.transcript.gene unless mis.nil? or mis.empty?
70
+ genes.concat mutation.transcripts_with_affected_splicing.gene
71
+ genes.uniq.each{|gene| samples_with_gene_affected[gene] ||= []; samples_with_gene_affected[gene] << genotype.jobname}
72
+ end
73
+ end
74
+ samples_with_gene_affected
75
+ end
76
+
77
+ property :gene_sample_matrix => :single do
78
+ genotyped_samples = samples.select{|s| s.has_genotype?}.sort.uniq
79
+
80
+ tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list, :fields => genotyped_samples)
81
+
82
+ num_samples = genotyped_samples.length
83
+ genotyped_samples.each_with_index do |sample,i|
84
+ affected_genes = sample.affected_genes
85
+ next if affected_genes.empty?
86
+ affected_genes.clean_annotations.each do |gene|
87
+ tsv[gene] ||= ["FALSE"] * num_samples
88
+ tsv[gene][i] = "TRUE"
89
+ end
90
+ end
91
+
92
+ tsv.fields = genotyped_samples
93
+
94
+ tsv
95
+ end
96
+
97
+ property :recurrent_genes => :single do |*args|
98
+ min = args.first
99
+ min = 2 if min.nil?
100
+
101
+ Gene.setup(samples_with_gene_affected.select{|gene, samples| samples.length >= min }.collect{|gene,samples| gene}, "Ensembl Gene ID", organism)
102
+ end
103
+ end
@@ -0,0 +1,39 @@
1
+ module Study
2
+
3
+ self.study_registry[:mutation_genes] = Proc.new{|study,database|
4
+ tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
5
+ study.cohort.metagenotype.uniq.each do |mutation|
6
+ tsv[mutation] = mutation.genes
7
+ end
8
+ tsv
9
+ }
10
+
11
+ self.study_registry[:sample_mutations] = Proc.new{|study,database|
12
+ tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Genomic Mutation"], :type => :flat, :namespace => study.organism)
13
+ study.samples.select_by(:has_genotype?).each do |sample|
14
+ tsv[sample] = sample.mutations
15
+ end
16
+ tsv
17
+ }
18
+
19
+ self.study_registry[:sample_genes] = Proc.new{|study,database|
20
+ tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
21
+ study.samples.select_by(:has_genotype?).each do |sample|
22
+ tsv[sample] = sample.affected_genes
23
+ end
24
+ tsv
25
+ }
26
+
27
+ self.study_registry[:sample_genes2] = Proc.new{|study,database|
28
+ tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID", "Genomic Mutation"], :type => :double, :namespace => study.organism)
29
+ kb = study.knowledge_base.get_database(:mutation_genes, :source => "Ensembl Gene ID")
30
+ study.samples.select_by(:has_genotype?).each do |sample|
31
+ values = sample.affected_genes.collect do |gene|
32
+ [gene, kb[gene] * ";;"]
33
+ end
34
+ tsv[sample] = Misc.zip_fields values
35
+ end
36
+ tsv
37
+ }
38
+
39
+ end
@@ -0,0 +1,34 @@
1
+ module Study
2
+ property :all_mutations do
3
+ cohort.metagenotype.tap{|o| o.jobname = "All mutations in #{ self }" }
4
+ end
5
+
6
+ property :relevant_mutations do
7
+ all_mutations = self.all_mutations
8
+
9
+ all_mutations.select_by(:relevant?).tap{|o| o.jobname = "Relevant mutations in #{ self }" }
10
+ end
11
+
12
+ property :damaging_mutations do |*args|
13
+ relevant_mutations.select_by(:damaging?, *args).tap{|o| o.jobname = "Damaging mutations in #{ self }" }
14
+ end
15
+
16
+ property :mutations_altering_isoform_sequence do
17
+ relevant_mutations.select{|m|
18
+ mis = m.mutated_isoforms; not mis.nil? and mis.select{|m| m.consequence != "SYNONYMOUS"}.any?
19
+ }.tap{|o| o.jobname = "Mutations altering isoform sequence in #{ self }"}
20
+ end
21
+
22
+ property :mutations_affecting_splicing_sites do
23
+ relevant_mutations.select_by(:transcripts_with_affected_splicing){|ts| ts.any? }.
24
+ tap{|o| o.jobname = "Mutations affecting splicing sites in #{ self }"}
25
+ end
26
+
27
+ property :mutations_over_gene do |gene|
28
+ all_mutations.select_by(:genes){|genes| genes and genes.include? gene}
29
+ end
30
+
31
+ property :mutations_over_gene_list do |list|
32
+ all_mutations.select_by(:genes){|genes| genes and (genes & list).any?}
33
+ end
34
+ end
@@ -0,0 +1,28 @@
1
+ module Sample
2
+ property :has_genotype? => :array2single do
3
+ study.cohort.values_at(*self).collect{|g| not g.nil?}
4
+ end
5
+
6
+ property :mutations do
7
+ Study.setup(study)
8
+ study.cohort[self]
9
+ end
10
+
11
+ property :relevant_mutations do
12
+ mutations.select_by(:relevant?)
13
+ end
14
+
15
+ property :damaging_mutations do |*args|
16
+ mutations.select_by(:damaging?, *args)
17
+ end
18
+
19
+ property :affected_genes do
20
+ mutations.affected_genes.compact.flatten.uniq
21
+ end
22
+
23
+ property :damaged_genes do |*args|
24
+ mutations.damaged_genes(*args).compact.flatten.uniq
25
+ end
26
+ end
27
+
28
+
@@ -0,0 +1,110 @@
1
+ require 'rbbt/entity/genotype'
2
+
3
+ require 'rbbt/entity/study/genotypes/samples'
4
+ require 'rbbt/entity/study/genotypes/mutations'
5
+ require 'rbbt/entity/study/genotypes/genes'
6
+ require 'rbbt/entity/study/genotypes/enrichment'
7
+ require 'rbbt/entity/study/genotypes/knowledge_base'
8
+
9
+ Workflow.require_workflow "NKIWorkflow"
10
+ Workflow.require_workflow "TSVWorkflow"
11
+
12
+ module StudyWorkflow
13
+ helper :organism do
14
+ study.metadata[:organism]
15
+ end
16
+
17
+ task :genotype_overview => :tsv do
18
+ gene_overview = TSV.setup({},
19
+ :key_field => "Ensembl Gene ID",
20
+ :fields => ["Samples with gene mutated", "Samples with gene affected", "Samples with gene damaged", "Mutation significance"],
21
+ :type => :double
22
+ )
23
+ genotyped_samples = study.samples.select_by(:has_genotype?)
24
+ all_mutations = study.all_mutations
25
+ if all_mutations.empty?
26
+ gene_overview
27
+ else
28
+
29
+ log :affected_genes, "Computing how genes are affected by mutations"
30
+ mutation_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.genes}
31
+ mutation_affected_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.affected_genes}
32
+ if all_mutations.length < 5000
33
+ log :damaged_genes, "Computing genes damaged genes"
34
+ mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.damaged_genes}
35
+ else
36
+ mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| [nil] * all_mutations.length}
37
+ end
38
+ log :significance, "Computing mutation significance"
39
+ mutation_significance = NKIWorkflow.job(:significantly_mutated, study, :study => study, :threshold => 0.1).run
40
+ log :significance, "Reordering mutation significance file"
41
+
42
+ #TSVWorkflow.job(:change_id, study, :format => "Ensembl Gene ID", :tsv => mutation_significance).run
43
+ mutation_significance.identifiers = Organism.identifiers(study.organism)
44
+ mutation_significance = mutation_significance.change_key "Ensembl Gene ID"
45
+
46
+ log :samples, "Gathering affected samples"
47
+ samples_gene_status = {}
48
+ genotyped_samples.each do |sample|
49
+ samples_gene_status[sample] = {}
50
+
51
+ mutation_genes.values_at(*sample.mutations).each do |genes|
52
+ genes.each do |gene|
53
+ samples_gene_status[sample][gene] ||= [false, false, false]
54
+ samples_gene_status[sample][gene][0] = true
55
+ end
56
+ end
57
+
58
+ mutation_affected_genes.values_at(*sample.mutations).each do |genes|
59
+ genes.each do |gene|
60
+ samples_gene_status[sample][gene] ||= [false, false, false]
61
+ samples_gene_status[sample][gene][1] = true
62
+ end
63
+ end
64
+
65
+ mutation_damaged_genes.values_at(*sample.mutations).each do |genes|
66
+ next if genes.nil?
67
+ genes.each do |gene|
68
+ samples_gene_status[sample][gene] ||= [false, false, false]
69
+ samples_gene_status[sample][gene][2] = true
70
+ end
71
+ end
72
+ end
73
+
74
+ log :compiling, "Compiling result"
75
+ mutation_genes.values.compact.flatten.uniq.each do |gene|
76
+ gene_overview[gene] = []
77
+ gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
78
+ gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
79
+ gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][2]}.collect{|sample, gene_status| sample}
80
+ gene_overview[gene] << [mutation_significance.include?(gene) ? mutation_significance[gene]["p.value"] : "> 0.1"]
81
+ end
82
+
83
+ gene_overview
84
+ end
85
+ end
86
+ end
87
+
88
+ module Study
89
+ def has_genotypes?
90
+ dir.genotypes.exists?
91
+ end
92
+
93
+ attr_accessor :watson
94
+ def watson
95
+ @watson = metadata[:watson] if @watson.nil?
96
+ @watson
97
+ end
98
+
99
+ def genotype_files
100
+ dir.genotypes.glob("*")
101
+ end
102
+
103
+ def cohort
104
+ @cohort ||= genotype_files.collect do |f|
105
+ name = File.basename(f)
106
+ genomic_mutations = Open.read(f).split("\n").sort
107
+ GenomicMutation.setup(genomic_mutations, name, organism, watson)
108
+ end.tap{|cohort| cohort.extend Genotype::Cohort}
109
+ end
110
+ end
@@ -0,0 +1,36 @@
1
+ module Study
2
+
3
+ class << self
4
+ attr_accessor :knowledge_base, :study_registry
5
+ def knowledge_base
6
+ @knowledge_base ||= KnowledgeBase.new Rbbt.var.knowledge_base.Study
7
+ end
8
+
9
+ def study_registry
10
+ @study_registry ||= {}
11
+ end
12
+ end
13
+
14
+ attr_accessor :knowledge_base
15
+
16
+ def knowledge_base
17
+ @knowledge_base ||= begin
18
+ kb = KnowledgeBase.new(self.dir.var.knowledge_base, self.organism)
19
+ kb.format["Gene"] = "Ensembl Gene ID"
20
+ kb.entity_options["Sample"] = {"Study" => self}
21
+ Study.study_registry.each do |database, file|
22
+ Log.debug("Inheriting #{ database } from registry: #{Misc.fingerprint file}")
23
+ if Proc === file
24
+ study = self
25
+ block = Proc.new{ file.call(self, database) }
26
+ block.define_singleton_method(:filename) do [database, study] * "@" end
27
+ kb.register database, nil, {}, &block
28
+ else
29
+ kb.register database, file
30
+ end
31
+ end
32
+ kb
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,31 @@
1
+ module Sample
2
+ property :methylation => :array2single do
3
+ study.methylation_cohort
4
+ end
5
+
6
+ property :has_methylation? => :array2single do
7
+ study.methylation_cohort.values_at(*self).collect{|methylation| not methylation.nil?}
8
+ end
9
+
10
+ property :methylated => :single do
11
+ return [] if methylation.empty?
12
+ methylation.select_by(:methylated?)
13
+ end
14
+
15
+ property :unmethylated => :single do
16
+ return [] if methylation.empty?
17
+ methylation.select_by(:unmethylated?)
18
+ end
19
+
20
+ property :methylated_genes => :single do
21
+ return [] if methylated.empty?
22
+ Gene.setup(methylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
23
+ end
24
+
25
+ property :unmethylated_genes => :single do
26
+ return [] if unmethylated.empty?
27
+ Gene.setup(unmethylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
28
+ end
29
+ end
30
+
31
+
@@ -0,0 +1,90 @@
1
+ require 'rbbt/entity/methylation'
2
+
3
+ require 'rbbt/entity/study/methylation/samples'
4
+
5
+ module StudyWorkflow
6
+ helper :organism do
7
+ study.metadata[:organism]
8
+ end
9
+ end
10
+
11
+ module Study
12
+ def has_methylation?
13
+ dir.methylation.exists?
14
+ end
15
+
16
+ def methylation_files
17
+ dir.methylation.find.glob("*")
18
+ end
19
+
20
+ def methylation_cohort
21
+ if @methylation_cohort.nil?
22
+ @methylation_cohort = {}
23
+ methylation_files.each do |f|
24
+ sample = File.basename(f)
25
+ Sample.setup(sample, self)
26
+ methylations = Open.read(f).split("\n").sort
27
+ Methylation.setup(methylations, organism)
28
+ @methylation_cohort[sample] = methylations
29
+ end
30
+ end
31
+ @methylation_cohort
32
+ end
33
+ end
34
+
35
+ module Study
36
+ property :recurrently_lost_genes => :single do |threshold|
37
+ counts = {}
38
+ self.samples.each do |sample|
39
+ next unless sample.has_methylation?
40
+ puts sample
41
+
42
+ genes = nil
43
+ genes = sample.lost_genes.clean_annotations
44
+ genes.each do |gene|
45
+ counts[gene] ||= 0
46
+ counts[gene] += 1
47
+ end
48
+ end
49
+
50
+ recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
51
+ Gene.setup(recurrent, "Ensembl Gene ID", organism)
52
+ end
53
+
54
+ property :recurrently_gained_genes => :single do |threshold|
55
+ counts = {}
56
+ self.samples.each do |sample|
57
+ next unless sample.has_methylation?
58
+ puts sample
59
+
60
+ genes = nil
61
+ genes = sample.gained_genes.clean_annotations
62
+ genes.each do |gene|
63
+ counts[gene] ||= 0
64
+ counts[gene] += 1
65
+ end
66
+ end
67
+
68
+ recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
69
+ Gene.setup(recurrent, "Ensembl Gene ID", organism)
70
+ end
71
+
72
+ property :gene_sample_methylation_matrix => :single do
73
+ tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
74
+ samples = []
75
+ i = 0
76
+ num_samples = cohort.length
77
+ methylation_cohort.each do |sample,methylation|
78
+ methylation.genes.compact.flatten.uniq.each do |gene|
79
+ tsv[gene] ||= ["FALSE"] * num_samples
80
+ tsv[gene][i] = "TRUE"
81
+ end
82
+ samples << sample
83
+ i += 1
84
+ end
85
+
86
+ tsv.fields = samples
87
+
88
+ tsv
89
+ end
90
+ end