rbbt-study 0.2.30 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- metadata +2 -24
- data/lib/rbbt/entity/study.rb +0 -172
- data/lib/rbbt/entity/study/cnv.rb +0 -170
- data/lib/rbbt/entity/study/cnv/genes.rb +0 -28
- data/lib/rbbt/entity/study/cnv/knowledge_base.rb +0 -39
- data/lib/rbbt/entity/study/cnv/samples.rb +0 -54
- data/lib/rbbt/entity/study/enrichment.rb +0 -418
- data/lib/rbbt/entity/study/expression.rb +0 -24
- data/lib/rbbt/entity/study/features.rb +0 -17
- data/lib/rbbt/entity/study/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes.rb +0 -134
- data/lib/rbbt/entity/study/genotypes/enrichment.rb +0 -56
- data/lib/rbbt/entity/study/genotypes/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes/knowledge_base.rb +0 -81
- data/lib/rbbt/entity/study/genotypes/mutations.rb +0 -34
- data/lib/rbbt/entity/study/genotypes/samples.rb +0 -28
- data/lib/rbbt/entity/study/knowledge_base.rb +0 -35
- data/lib/rbbt/entity/study/methylation.rb +0 -90
- data/lib/rbbt/entity/study/methylation/samples.rb +0 -31
- data/lib/rbbt/entity/study/mutations.rb +0 -259
- data/lib/rbbt/entity/study/plots.rb +0 -140
- data/lib/rbbt/entity/study/samples.rb +0 -78
- data/lib/rbbt/entity/study/snp.rb +0 -87
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
module StudyWorkflow
|
|
2
|
-
helper :organism do
|
|
3
|
-
study.metadata[:organism]
|
|
4
|
-
end
|
|
5
|
-
|
|
6
|
-
task :matrix => :tsv do
|
|
7
|
-
matrix = study.matrix("gene_expression", "Ensembl Gene ID", organism)
|
|
8
|
-
matrix.matrix_file(path)
|
|
9
|
-
nil
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
task :expression_barcode => :tsv do |*args|
|
|
13
|
-
factor = args.first || 2
|
|
14
|
-
matrix = study.matrix("gene_expression", "Ensembl Gene ID", organism)
|
|
15
|
-
matrix.barcode(path, factor)
|
|
16
|
-
nil
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
module Study
|
|
21
|
-
def has_expression?
|
|
22
|
-
dir.matrices["gene_expression"].exists?
|
|
23
|
-
end
|
|
24
|
-
end
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
dep :mutated_genes_per_sample
|
|
2
|
-
input :list, :array, "Gene list in Ensembl Gene ID"
|
|
3
|
-
task :gene_features => :tsv do |list|
|
|
4
|
-
mutated_genes_per_sample = step(:mutated_genes_per_sample).load
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
samples = study.cohort.fields
|
|
8
|
-
fields = list.name.collect{|n| n + "_mut"}
|
|
9
|
-
table = TSV.setup({}, :key_field => "Sample", :fields => fields)
|
|
10
|
-
|
|
11
|
-
samples.each do |sample|
|
|
12
|
-
affected_genes = mutated_genes_per_sample[sample] || []
|
|
13
|
-
table[sample] = list.collect{|gene| affected_genes.include?(gene)? 1 : 0}
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
table
|
|
17
|
-
end
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
# NON UNIQ
|
|
2
|
-
returns "Ensembl Gene ID"
|
|
3
|
-
task :affected_genes => :annotations do
|
|
4
|
-
Gene.setup(study.cohort.collect{|genotype| genotype.genes.compact}.flatten, "Ensembl Gene ID", organism)
|
|
5
|
-
end
|
|
6
|
-
|
|
7
|
-
# NON UNIQ
|
|
8
|
-
dep :relevant_mutations
|
|
9
|
-
returns "Ensembl Gene ID"
|
|
10
|
-
task :relevant_genes => :annotations do
|
|
11
|
-
relevant_mutations = step(:relevant_mutations).load
|
|
12
|
-
genes = relevant_mutations.collect{|mutation|
|
|
13
|
-
splicing = mutation.in_exon_junction? ? mutation.transcripts_with_affected_splicing.gene : []
|
|
14
|
-
protein = (mis = mutation.mutated_isoforms).nil? ? [] : mis.protein.gene.compact.uniq
|
|
15
|
-
(splicing + protein).uniq
|
|
16
|
-
}.compact.flatten
|
|
17
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
# NON UNIQ
|
|
21
|
-
dep :relevant_mutations
|
|
22
|
-
returns "Ensembl Gene ID"
|
|
23
|
-
input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
|
|
24
|
-
input :add_exon_junction, :boolean, "Add exon junction mutations", true
|
|
25
|
-
task :damaged_genes => :annotations do |methods, add_exon_junction|
|
|
26
|
-
relevant_mutations = step(:relevant_mutations).load
|
|
27
|
-
all_mis = relevant_mutations.mutated_isoforms.compact.flatten
|
|
28
|
-
mi_damaged = Misc.process_to_hash(all_mis){|all_mis| all_mis.damaged?(methods) }
|
|
29
|
-
|
|
30
|
-
genes = relevant_mutations.collect{|mutation|
|
|
31
|
-
genes = []
|
|
32
|
-
|
|
33
|
-
genes.concat mutation.transcripts_with_affected_splicing.gene if add_exon_junction and mutation.in_exon_junction? and mutation.type != 'none'
|
|
34
|
-
|
|
35
|
-
mis = mutation.mutated_isoforms
|
|
36
|
-
genes.concat mis.select{|mi| mi_damaged[mi]}.protein.gene.compact.uniq unless mis.nil?
|
|
37
|
-
|
|
38
|
-
genes.uniq
|
|
39
|
-
}.compact.flatten
|
|
40
|
-
|
|
41
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
dep :relevant_genes
|
|
45
|
-
task :gene_mutation_count => :yaml do
|
|
46
|
-
relevant_genes = step(:relevant_genes).load
|
|
47
|
-
if relevant_genes.any?
|
|
48
|
-
Misc.counts(relevant_genes.clean_annotations)
|
|
49
|
-
else
|
|
50
|
-
{}
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# NON UNIQ
|
|
55
|
-
dep :gene_mutation_count
|
|
56
|
-
input :percentage, :float, "Minimum percentage of samples with the mutation", 0
|
|
57
|
-
returns "Ensembl Gene ID"
|
|
58
|
-
task :recurrent_genes => :annotations do |percentage|
|
|
59
|
-
gene_mutation_count = step(:gene_mutation_count).load
|
|
60
|
-
minimum = (study.cohort.length.to_f * percentage.to_f) / 100.0
|
|
61
|
-
|
|
62
|
-
genes = gene_mutation_count.select{|gene, count|
|
|
63
|
-
|
|
64
|
-
count > 1 and count > minimum
|
|
65
|
-
|
|
66
|
-
}.collect{|gene, count| gene}
|
|
67
|
-
|
|
68
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
dep :damaged_genes
|
|
72
|
-
dep :recurrent_genes
|
|
73
|
-
returns "Ensembl Gene ID"
|
|
74
|
-
task :suspect_genes => :annotations do
|
|
75
|
-
damaged_genes = step(:damaged_genes).load
|
|
76
|
-
recurrent_genes = step(:recurrent_genes).load
|
|
77
|
-
|
|
78
|
-
Gene.setup(( damaged_genes + recurrent_genes ).flatten.uniq, "Ensembl Gene ID", organism)
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
dep :relevant_mutations
|
|
82
|
-
dep :recurrent_genes
|
|
83
|
-
task :mutations_over_recurrent_genes => :annotations do
|
|
84
|
-
relevant_mutations = step(:relevant_mutations).load
|
|
85
|
-
recurrent_genes = step(:recurrent_genes).load
|
|
86
|
-
|
|
87
|
-
relevant_mutations.select{|mutation| mutation.genes and (mutation.genes & recurrent_genes).any?}
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
dep :relevant_mutations
|
|
91
|
-
dep :suspect_genes
|
|
92
|
-
task :mutations_over_suspect_genes => :annotations do
|
|
93
|
-
relevant_mutations = step(:relevant_mutations).load
|
|
94
|
-
suspect_genes = step(:suspect_genes).load
|
|
95
|
-
|
|
96
|
-
relevant_mutations.select{|mutation| mutation.genes and (mutation.genes & suspect_genes).any?}
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
require 'rbbt/mutation/oncodriveFM'
|
|
100
|
-
task :oncodriveFM => :tsv do
|
|
101
|
-
tsv = OncodriveFM.process_cohort(study.cohort)
|
|
102
|
-
tsv.namespace = organism
|
|
103
|
-
tsv
|
|
104
|
-
end
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
require 'rbbt/entity/genotype'
|
|
2
|
-
|
|
3
|
-
require 'rbbt/entity/study/genotypes/samples'
|
|
4
|
-
require 'rbbt/entity/study/genotypes/mutations'
|
|
5
|
-
require 'rbbt/entity/study/genotypes/genes'
|
|
6
|
-
require 'rbbt/entity/study/genotypes/enrichment'
|
|
7
|
-
require 'rbbt/entity/study/genotypes/knowledge_base'
|
|
8
|
-
|
|
9
|
-
module StudyWorkflow
|
|
10
|
-
helper :organism do
|
|
11
|
-
study.metadata[:organism]
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
#task :binomial_significance => :tsv do
|
|
15
|
-
|
|
16
|
-
# tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :fields => ["Matches", "Bases", "Frequency", "p.value"], :namespace => organism)
|
|
17
|
-
|
|
18
|
-
# matches = study.knowledge_base.get_index(:mutation_genes).keys
|
|
19
|
-
# genes = matches.collect{|m| m.partition("~").last}.uniq
|
|
20
|
-
# all_mutations = matches.collect{|m| m.partition("~").first}.uniq
|
|
21
|
-
|
|
22
|
-
# total_bases = Gene.gene_list_exon_bases(genes)
|
|
23
|
-
# global_frequency = all_mutations.length.to_f / total_bases
|
|
24
|
-
|
|
25
|
-
# gene2exon_size = Misc.process_to_hash(genes){|genes| genes.collect{|gene| Gene.gene_list_exon_bases([gene]) }}
|
|
26
|
-
|
|
27
|
-
# genes.each do |gene|
|
|
28
|
-
# mutations = study.knowledge_base.parents(:mutation_genes, gene).target
|
|
29
|
-
# mutations = study.knowledge_base.subset(:sample_mutations, "Genomic Mutation" => mutations, "Sample" => :all).source
|
|
30
|
-
# next if mutations.empty?
|
|
31
|
-
# matches = mutations.length
|
|
32
|
-
# exon_bases = gene2exon_size[gene]
|
|
33
|
-
# next if exon_bases == 0
|
|
34
|
-
# frequency = matches.to_f / exon_bases
|
|
35
|
-
# pvalue = RSRuby.instance.binom_test(matches, exon_bases, global_frequency, 'greater')["p.value"]
|
|
36
|
-
# tsv[gene] = [matches, exon_bases, frequency, pvalue]
|
|
37
|
-
# end
|
|
38
|
-
|
|
39
|
-
# tsv
|
|
40
|
-
#end
|
|
41
|
-
|
|
42
|
-
task :genotype_overview => :tsv do
|
|
43
|
-
gene_overview = TSV.setup({},
|
|
44
|
-
:key_field => "Ensembl Gene ID",
|
|
45
|
-
:fields => ["Samples with gene mutated", "Samples with gene affected", "Samples with gene damaged", "Mutation significance"],
|
|
46
|
-
:type => :double
|
|
47
|
-
)
|
|
48
|
-
genotyped_samples = study.samples.select_by(:has_genotype?)
|
|
49
|
-
all_mutations = study.all_mutations
|
|
50
|
-
if all_mutations.empty?
|
|
51
|
-
gene_overview
|
|
52
|
-
else
|
|
53
|
-
|
|
54
|
-
log :affected_genes, "Computing how genes are affected by mutations"
|
|
55
|
-
mutation_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.genes}
|
|
56
|
-
mutation_affected_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.affected_genes}
|
|
57
|
-
if all_mutations.length < 5000
|
|
58
|
-
log :damaged_genes, "Computing damaged genes"
|
|
59
|
-
mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.damaged_genes}
|
|
60
|
-
else
|
|
61
|
-
mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| [nil] * all_mutations.length}
|
|
62
|
-
end
|
|
63
|
-
log :significance, "Computing mutation significance"
|
|
64
|
-
mutation_significance = NKIWorkflow.job(:significantly_mutated, study, :study => study, :threshold => 0.1).run
|
|
65
|
-
log :significance, "Reordering mutation significance file"
|
|
66
|
-
|
|
67
|
-
mutation_significance.identifiers = Organism.identifiers(study.organism)
|
|
68
|
-
mutation_significance = mutation_significance.change_key "Ensembl Gene ID"
|
|
69
|
-
|
|
70
|
-
log :samples, "Gathering affected samples"
|
|
71
|
-
samples_gene_status = {}
|
|
72
|
-
genotyped_samples.each do |sample|
|
|
73
|
-
samples_gene_status[sample] = {}
|
|
74
|
-
|
|
75
|
-
mutation_genes.values_at(*sample.mutations).each do |genes|
|
|
76
|
-
genes.each do |gene|
|
|
77
|
-
samples_gene_status[sample][gene] ||= [false, false, false]
|
|
78
|
-
samples_gene_status[sample][gene][0] = true
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
mutation_affected_genes.values_at(*sample.mutations).each do |genes|
|
|
83
|
-
genes.each do |gene|
|
|
84
|
-
samples_gene_status[sample][gene] ||= [false, false, false]
|
|
85
|
-
samples_gene_status[sample][gene][1] = true
|
|
86
|
-
end
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
mutation_damaged_genes.values_at(*sample.mutations).each do |genes|
|
|
90
|
-
next if genes.nil?
|
|
91
|
-
genes.each do |gene|
|
|
92
|
-
samples_gene_status[sample][gene] ||= [false, false, false]
|
|
93
|
-
samples_gene_status[sample][gene][2] = true
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
log :compiling, "Compiling result"
|
|
99
|
-
mutation_genes.values.compact.flatten.uniq.each do |gene|
|
|
100
|
-
gene_overview[gene] = []
|
|
101
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
|
|
102
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
|
|
103
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][2]}.collect{|sample, gene_status| sample}
|
|
104
|
-
gene_overview[gene] << [mutation_significance.include?(gene) ? mutation_significance[gene]["p.value"] : "> 0.1"]
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
gene_overview
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
module Study
|
|
113
|
-
def has_genotypes?
|
|
114
|
-
dir.genotypes.exists?
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
attr_accessor :watson
|
|
118
|
-
def watson
|
|
119
|
-
@watson = metadata[:watson] if @watson.nil?
|
|
120
|
-
@watson
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
def genotype_files
|
|
124
|
-
dir.genotypes.glob("*")
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
def cohort
|
|
128
|
-
@cohort ||= genotype_files.collect do |f|
|
|
129
|
-
name = File.basename(f)
|
|
130
|
-
genomic_mutations = Open.read(f).split("\n").sort
|
|
131
|
-
GenomicMutation.setup(genomic_mutations, name, organism, watson)
|
|
132
|
-
end.tap{|cohort| cohort.extend Genotype::Cohort}
|
|
133
|
-
end
|
|
134
|
-
end
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
require 'rbbt/workflow'
|
|
2
|
-
|
|
3
|
-
Workflow.require_workflow "MutationEnrichment"
|
|
4
|
-
module StudyWorkflow
|
|
5
|
-
|
|
6
|
-
#{{{ SAMPLE ENRICHMENT
|
|
7
|
-
input :database, :string
|
|
8
|
-
input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
|
9
|
-
input :baseline, :select, "Type of baseline to use", :pathway_base_counts, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
|
10
|
-
input :permutations, :integer, "Number of permutations in test", 10000
|
|
11
|
-
input :fdr, :boolean, "BH FDR corrections", true
|
|
12
|
-
input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
13
|
-
task :sample_pathway_enrichment => :tsv do |database,mutation_subset,baseline,permutations,fdr,masked_genes|
|
|
14
|
-
|
|
15
|
-
mutations = study.send(mutation_subset)
|
|
16
|
-
|
|
17
|
-
mutation_tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Sample"], :type => :flat)
|
|
18
|
-
|
|
19
|
-
study.cohort.each do |genotype|
|
|
20
|
-
sample = genotype.jobname
|
|
21
|
-
genotype.each do |mutation|
|
|
22
|
-
next unless mutations.include? mutation
|
|
23
|
-
mutation_tsv[mutation] ||= []
|
|
24
|
-
mutation_tsv[mutation] << sample
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
job = MutationEnrichment.job(:sample_pathway_enrichment, study,
|
|
29
|
-
:mutations => mutation_tsv, :database => database, :baseline => baseline, :fdr => fdr,
|
|
30
|
-
:masked_genes => masked_genes, :organism => study.organism, :permutations => permutations)
|
|
31
|
-
|
|
32
|
-
res = job.run
|
|
33
|
-
set_info :total_covered, job.info[:total_covered]
|
|
34
|
-
set_info :covered_mutations, job.info[:covered_mutations]
|
|
35
|
-
res
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
#{{{ METAGENOTYPE ENRICHMENT
|
|
39
|
-
input :database, :string
|
|
40
|
-
input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
|
41
|
-
input :baseline, :select, "Type of baseline to use", :pathway_base_counts, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
|
42
|
-
input :fdr, :boolean, "BH FDR corrections", true
|
|
43
|
-
input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
44
|
-
task :mutation_pathway_enrichment => :tsv do |database,mutation_subset,baseline,fdr,masked_genes,organism|
|
|
45
|
-
|
|
46
|
-
mutations = study.send(mutation_subset)
|
|
47
|
-
|
|
48
|
-
job = MutationEnrichment.job(:mutation_pathway_enrichment, study,
|
|
49
|
-
:mutations => mutations, :database => database, :baseline => baseline, :fdr => fdr,
|
|
50
|
-
:masked_genes => masked_genes, :organism => study.organism)
|
|
51
|
-
res = job.run
|
|
52
|
-
set_info :total_covered, job.info[:total_covered]
|
|
53
|
-
set_info :covered_mutations, job.info[:covered_mutations]
|
|
54
|
-
res
|
|
55
|
-
end
|
|
56
|
-
end
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
module Study
|
|
2
|
-
property :genes_with_overlapping_mutations => :single do
|
|
3
|
-
mutations = cohort.metagenotype
|
|
4
|
-
mutations.genes.compact.flatten.uniq
|
|
5
|
-
end
|
|
6
|
-
|
|
7
|
-
property :altered_isoforms => :single do
|
|
8
|
-
mutated_isoforms = cohort.metagenotype.subset(relevant_mutations).mutated_isoforms.compact.flatten.uniq
|
|
9
|
-
return [] if mutated_isoforms.empty?
|
|
10
|
-
mutated_isoforms.select_by(:consequence){|c| c != "SYNONYMOUS"}
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
property :genes_with_altered_isoform_sequence => :single do
|
|
14
|
-
altered_isoforms = self.altered_isoforms
|
|
15
|
-
return [] if altered_isoforms.empty?
|
|
16
|
-
altered_isoforms.transcript.compact.gene.uniq
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
property :damaged_isoforms => :single do |*args|
|
|
20
|
-
altered_isoforms = self.altered_isoforms
|
|
21
|
-
return [] if altered_isoforms.empty?
|
|
22
|
-
altered_isoforms.select_by(:damaged?, *args)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
property :genes_with_damaged_isoforms => :single do |*args|
|
|
26
|
-
damaged_isoforms = damaged_isoforms(*args)
|
|
27
|
-
return [] if damaged_isoforms.empty?
|
|
28
|
-
damaged_isoforms.transcript.gene.uniq
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
property :genes_with_affected_splicing_sites => :single do
|
|
32
|
-
cohort.metagenotype.subset(relevant_mutations).transcripts_with_affected_splicing.compact.flatten.uniq.gene.compact.uniq
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
property :affected_genes => :single do
|
|
36
|
-
Gene.setup(genes_with_altered_isoform_sequence + genes_with_affected_splicing_sites, "Ensembl Gene ID", organism).uniq
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
property :damaged_genes => :single do |*args|
|
|
40
|
-
Gene.setup((genes_with_damaged_isoforms(*args) + genes_with_affected_splicing_sites).uniq, "Ensembl Gene ID", organism)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
property :samples_with_gene_damaged => :single do
|
|
44
|
-
damaging_mutations= self.damaging_mutations
|
|
45
|
-
|
|
46
|
-
samples_with_gene_damaged = {}
|
|
47
|
-
cohort.each do |genotype|
|
|
48
|
-
genotype.each do |mutation|
|
|
49
|
-
next unless damaging_mutations.include? mutation
|
|
50
|
-
genes = []
|
|
51
|
-
mis = mutation.mutated_isoforms
|
|
52
|
-
genes.concat mis.select_by(:damaged?).transcript.gene unless mis.nil? or mis.empty?
|
|
53
|
-
genes.concat mutation.transcripts_with_affected_splicing.gene
|
|
54
|
-
genes.uniq.each{|gene| samples_with_gene_damaged[gene] ||= []; samples_with_gene_damaged[gene] << genotype.jobname}
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
samples_with_gene_damaged
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
property :samples_with_gene_affected => :single do
|
|
61
|
-
relevant_mutations = self.relevant_mutations
|
|
62
|
-
|
|
63
|
-
samples_with_gene_affected = {}
|
|
64
|
-
cohort.each do |genotype|
|
|
65
|
-
genotype.each do |mutation|
|
|
66
|
-
next if mutation.nil?
|
|
67
|
-
next unless relevant_mutations.include? mutation
|
|
68
|
-
genes = []
|
|
69
|
-
mis = mutation.mutated_isoforms
|
|
70
|
-
genes.concat mis.select_by(:non_synonymous).transcript.gene unless mis.nil? or mis.empty?
|
|
71
|
-
genes.concat mutation.transcripts_with_affected_splicing.gene
|
|
72
|
-
genes.uniq.each{|gene| samples_with_gene_affected[gene] ||= []; samples_with_gene_affected[gene] << genotype.jobname}
|
|
73
|
-
end
|
|
74
|
-
end
|
|
75
|
-
samples_with_gene_affected
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
property :gene_sample_matrix => :single do
|
|
79
|
-
genotyped_samples = samples.select{|s| s.has_genotype?}.sort.uniq
|
|
80
|
-
|
|
81
|
-
tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list, :fields => genotyped_samples)
|
|
82
|
-
|
|
83
|
-
num_samples = genotyped_samples.length
|
|
84
|
-
genotyped_samples.each_with_index do |sample,i|
|
|
85
|
-
affected_genes = sample.affected_genes
|
|
86
|
-
next if affected_genes.empty?
|
|
87
|
-
affected_genes.clean_annotations.each do |gene|
|
|
88
|
-
tsv[gene] ||= ["FALSE"] * num_samples
|
|
89
|
-
tsv[gene][i] = "TRUE"
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
tsv.fields = genotyped_samples
|
|
94
|
-
|
|
95
|
-
tsv
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
property :recurrent_genes => :single do |*args|
|
|
99
|
-
min = args.first
|
|
100
|
-
min = 2 if min.nil?
|
|
101
|
-
|
|
102
|
-
Gene.setup(samples_with_gene_affected.select{|gene, samples| samples.length >= min }.collect{|gene,samples| gene}, "Ensembl Gene ID", organism)
|
|
103
|
-
end
|
|
104
|
-
end
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
require 'rbbt/workflow'
|
|
2
|
-
Workflow.require_workflow "Genomics"
|
|
3
|
-
require 'rbbt/entity/gene'
|
|
4
|
-
require 'rbbt/entity/genomic_mutation'
|
|
5
|
-
|
|
6
|
-
module Study
|
|
7
|
-
|
|
8
|
-
self.study_registry[:mutation_genes] = Proc.new{|study,database|
|
|
9
|
-
tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
|
10
|
-
study.cohort.metagenotype.uniq.each do |mutation|
|
|
11
|
-
tsv[mutation] = mutation.genes
|
|
12
|
-
end
|
|
13
|
-
tsv
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
self.study_registry[:mutation_affected_genes] = Proc.new{|study,database|
|
|
17
|
-
tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
|
18
|
-
study.cohort.metagenotype.uniq.each do |mutation|
|
|
19
|
-
tsv[mutation] = mutation.affected_genes
|
|
20
|
-
end
|
|
21
|
-
tsv
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
self.study_registry[:mutation_damaged_genes] = Proc.new{|study,database|
|
|
25
|
-
tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
|
26
|
-
study.cohort.metagenotype.uniq.each do |mutation|
|
|
27
|
-
tsv[mutation] = mutation.damaged_genes
|
|
28
|
-
end
|
|
29
|
-
tsv
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
self.study_registry[:sample_mutations] = Proc.new{|study,database|
|
|
33
|
-
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Genomic Mutation"], :type => :flat, :namespace => study.organism)
|
|
34
|
-
|
|
35
|
-
study.samples.select_by(:has_genotype?).each do |sample|
|
|
36
|
-
tsv[sample] = sample.mutations
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
tsv
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
self.study_registry[:sample_genes] = Proc.new{|study,database|
|
|
43
|
-
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID", "Genomic Mutation", "Affected isoform", "Damaged isoform", "Exon Junction"], :type => :double, :namespace => study.organism)
|
|
44
|
-
|
|
45
|
-
sample_mutations = study.knowledge_base.get_database(:sample_mutations, :source => "Sample")
|
|
46
|
-
all_mutations = study.all_mutations
|
|
47
|
-
mutations2mutated_isoforms = Misc.process_to_hash(all_mutations){|mutations| mutations.any? ? mutations.mutated_isoforms : [] }
|
|
48
|
-
mutations2exon_junction = Misc.process_to_hash(all_mutations){|mutations| mutations.any? ? mutations.in_exon_junction? : [] }
|
|
49
|
-
mi2damaged = Misc.process_to_hash(MutatedIsoform.setup(mutations2mutated_isoforms.values.flatten.compact.uniq, study.organism)){|mis| mis.any? ? mis.damaged? : [] }
|
|
50
|
-
#mi2damaged = Misc.process_to_hash(MutatedIsoform.setup(mutations2mutated_isoforms.values.flatten.compact.uniq, study.organism)){|mis| [false] * mis.length }
|
|
51
|
-
mi2consequence = Misc.process_to_hash(MutatedIsoform.setup(mutations2mutated_isoforms.values.flatten.compact.uniq, study.organism)){|mis| mis.any? ? mis.consequence : [] }
|
|
52
|
-
|
|
53
|
-
gene_mutations = study.knowledge_base.get_database(:mutation_genes, :source => "Ensembl Gene ID")
|
|
54
|
-
gene_mutations.unnamed = true
|
|
55
|
-
gene_mutations.entity_options["Genomic Mutation"] = {:watson => study.watson, :organism => study.organism}
|
|
56
|
-
study.samples.select_by(:has_genotype?).each do |sample|
|
|
57
|
-
values = sample.affected_genes.collect do |gene|
|
|
58
|
-
mutations = gene_mutations[gene] & (sample_mutations[sample] || [])
|
|
59
|
-
|
|
60
|
-
if mutations and mutations.any?
|
|
61
|
-
GenomicMutation.setup(mutations, "Mutations in #{ sample } over #{ gene }", study.organism, study.watson)
|
|
62
|
-
junction = mutations.select{|mutation| mutations2exon_junction[mutation] }.any?
|
|
63
|
-
|
|
64
|
-
mis = Annotated.flatten mutations2mutated_isoforms.values_at(*mutations).compact
|
|
65
|
-
|
|
66
|
-
affected = (mis.any? and mis.select{|mi| c = mi2consequence[mi]; ! %w(UTR SYNONYMOUS).include? c}.any?)
|
|
67
|
-
damaged = (mis.any? and mis.select{|mi| mi2damaged[mi] }.any?)
|
|
68
|
-
|
|
69
|
-
[gene, mutations * ";;", affected, damaged, junction]
|
|
70
|
-
else
|
|
71
|
-
[gene, "", false, false, false]
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
tsv[sample] = Misc.zip_fields values
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
tsv
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
end
|