rbbt-study 0.2.30 → 0.2.31
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- metadata +2 -24
- data/lib/rbbt/entity/study.rb +0 -172
- data/lib/rbbt/entity/study/cnv.rb +0 -170
- data/lib/rbbt/entity/study/cnv/genes.rb +0 -28
- data/lib/rbbt/entity/study/cnv/knowledge_base.rb +0 -39
- data/lib/rbbt/entity/study/cnv/samples.rb +0 -54
- data/lib/rbbt/entity/study/enrichment.rb +0 -418
- data/lib/rbbt/entity/study/expression.rb +0 -24
- data/lib/rbbt/entity/study/features.rb +0 -17
- data/lib/rbbt/entity/study/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes.rb +0 -134
- data/lib/rbbt/entity/study/genotypes/enrichment.rb +0 -56
- data/lib/rbbt/entity/study/genotypes/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes/knowledge_base.rb +0 -81
- data/lib/rbbt/entity/study/genotypes/mutations.rb +0 -34
- data/lib/rbbt/entity/study/genotypes/samples.rb +0 -28
- data/lib/rbbt/entity/study/knowledge_base.rb +0 -35
- data/lib/rbbt/entity/study/methylation.rb +0 -90
- data/lib/rbbt/entity/study/methylation/samples.rb +0 -31
- data/lib/rbbt/entity/study/mutations.rb +0 -259
- data/lib/rbbt/entity/study/plots.rb +0 -140
- data/lib/rbbt/entity/study/samples.rb +0 -78
- data/lib/rbbt/entity/study/snp.rb +0 -87
@@ -1,24 +0,0 @@
|
|
1
|
-
module StudyWorkflow
|
2
|
-
helper :organism do
|
3
|
-
study.metadata[:organism]
|
4
|
-
end
|
5
|
-
|
6
|
-
task :matrix => :tsv do
|
7
|
-
matrix = study.matrix("gene_expression", "Ensembl Gene ID", organism)
|
8
|
-
matrix.matrix_file(path)
|
9
|
-
nil
|
10
|
-
end
|
11
|
-
|
12
|
-
task :expression_barcode => :tsv do |*args|
|
13
|
-
factor = args.first || 2
|
14
|
-
matrix = study.matrix("gene_expression", "Ensembl Gene ID", organism)
|
15
|
-
matrix.barcode(path, factor)
|
16
|
-
nil
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
module Study
|
21
|
-
def has_expression?
|
22
|
-
dir.matrices["gene_expression"].exists?
|
23
|
-
end
|
24
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
dep :mutated_genes_per_sample
|
2
|
-
input :list, :array, "Gene list in Ensembl Gene ID"
|
3
|
-
task :gene_features => :tsv do |list|
|
4
|
-
mutated_genes_per_sample = step(:mutated_genes_per_sample).load
|
5
|
-
|
6
|
-
|
7
|
-
samples = study.cohort.fields
|
8
|
-
fields = list.name.collect{|n| n + "_mut"}
|
9
|
-
table = TSV.setup({}, :key_field => "Sample", :fields => fields)
|
10
|
-
|
11
|
-
samples.each do |sample|
|
12
|
-
affected_genes = mutated_genes_per_sample[sample] || []
|
13
|
-
table[sample] = list.collect{|gene| affected_genes.include?(gene)? 1 : 0}
|
14
|
-
end
|
15
|
-
|
16
|
-
table
|
17
|
-
end
|
@@ -1,104 +0,0 @@
|
|
1
|
-
# NON UNIQ
|
2
|
-
returns "Ensembl Gene ID"
|
3
|
-
task :affected_genes => :annotations do
|
4
|
-
Gene.setup(study.cohort.collect{|genotype| genotype.genes.compact}.flatten, "Ensembl Gene ID", organism)
|
5
|
-
end
|
6
|
-
|
7
|
-
# NON UNIQ
|
8
|
-
dep :relevant_mutations
|
9
|
-
returns "Ensembl Gene ID"
|
10
|
-
task :relevant_genes => :annotations do
|
11
|
-
relevant_mutations = step(:relevant_mutations).load
|
12
|
-
genes = relevant_mutations.collect{|mutation|
|
13
|
-
splicing = mutation.in_exon_junction? ? mutation.transcripts_with_affected_splicing.gene : []
|
14
|
-
protein = (mis = mutation.mutated_isoforms).nil? ? [] : mis.protein.gene.compact.uniq
|
15
|
-
(splicing + protein).uniq
|
16
|
-
}.compact.flatten
|
17
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
18
|
-
end
|
19
|
-
|
20
|
-
# NON UNIQ
|
21
|
-
dep :relevant_mutations
|
22
|
-
returns "Ensembl Gene ID"
|
23
|
-
input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
|
24
|
-
input :add_exon_junction, :boolean, "Add exon junction mutations", true
|
25
|
-
task :damaged_genes => :annotations do |methods, add_exon_junction|
|
26
|
-
relevant_mutations = step(:relevant_mutations).load
|
27
|
-
all_mis = relevant_mutations.mutated_isoforms.compact.flatten
|
28
|
-
mi_damaged = Misc.process_to_hash(all_mis){|all_mis| all_mis.damaged?(methods) }
|
29
|
-
|
30
|
-
genes = relevant_mutations.collect{|mutation|
|
31
|
-
genes = []
|
32
|
-
|
33
|
-
genes.concat mutation.transcripts_with_affected_splicing.gene if add_exon_junction and mutation.in_exon_junction? and mutation.type != 'none'
|
34
|
-
|
35
|
-
mis = mutation.mutated_isoforms
|
36
|
-
genes.concat mis.select{|mi| mi_damaged[mi]}.protein.gene.compact.uniq unless mis.nil?
|
37
|
-
|
38
|
-
genes.uniq
|
39
|
-
}.compact.flatten
|
40
|
-
|
41
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
42
|
-
end
|
43
|
-
|
44
|
-
dep :relevant_genes
|
45
|
-
task :gene_mutation_count => :yaml do
|
46
|
-
relevant_genes = step(:relevant_genes).load
|
47
|
-
if relevant_genes.any?
|
48
|
-
Misc.counts(relevant_genes.clean_annotations)
|
49
|
-
else
|
50
|
-
{}
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# NON UNIQ
|
55
|
-
dep :gene_mutation_count
|
56
|
-
input :percentage, :float, "Minimum percentage of samples with the mutation", 0
|
57
|
-
returns "Ensembl Gene ID"
|
58
|
-
task :recurrent_genes => :annotations do |percentage|
|
59
|
-
gene_mutation_count = step(:gene_mutation_count).load
|
60
|
-
minimum = (study.cohort.length.to_f * percentage.to_f) / 100.0
|
61
|
-
|
62
|
-
genes = gene_mutation_count.select{|gene, count|
|
63
|
-
|
64
|
-
count > 1 and count > minimum
|
65
|
-
|
66
|
-
}.collect{|gene, count| gene}
|
67
|
-
|
68
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
69
|
-
end
|
70
|
-
|
71
|
-
dep :damaged_genes
|
72
|
-
dep :recurrent_genes
|
73
|
-
returns "Ensembl Gene ID"
|
74
|
-
task :suspect_genes => :annotations do
|
75
|
-
damaged_genes = step(:damaged_genes).load
|
76
|
-
recurrent_genes = step(:recurrent_genes).load
|
77
|
-
|
78
|
-
Gene.setup(( damaged_genes + recurrent_genes ).flatten.uniq, "Ensembl Gene ID", organism)
|
79
|
-
end
|
80
|
-
|
81
|
-
dep :relevant_mutations
|
82
|
-
dep :recurrent_genes
|
83
|
-
task :mutations_over_recurrent_genes => :annotations do
|
84
|
-
relevant_mutations = step(:relevant_mutations).load
|
85
|
-
recurrent_genes = step(:recurrent_genes).load
|
86
|
-
|
87
|
-
relevant_mutations.select{|mutation| mutation.genes and (mutation.genes & recurrent_genes).any?}
|
88
|
-
end
|
89
|
-
|
90
|
-
dep :relevant_mutations
|
91
|
-
dep :suspect_genes
|
92
|
-
task :mutations_over_suspect_genes => :annotations do
|
93
|
-
relevant_mutations = step(:relevant_mutations).load
|
94
|
-
suspect_genes = step(:suspect_genes).load
|
95
|
-
|
96
|
-
relevant_mutations.select{|mutation| mutation.genes and (mutation.genes & suspect_genes).any?}
|
97
|
-
end
|
98
|
-
|
99
|
-
require 'rbbt/mutation/oncodriveFM'
|
100
|
-
task :oncodriveFM => :tsv do
|
101
|
-
tsv = OncodriveFM.process_cohort(study.cohort)
|
102
|
-
tsv.namespace = organism
|
103
|
-
tsv
|
104
|
-
end
|
@@ -1,134 +0,0 @@
|
|
1
|
-
require 'rbbt/entity/genotype'
|
2
|
-
|
3
|
-
require 'rbbt/entity/study/genotypes/samples'
|
4
|
-
require 'rbbt/entity/study/genotypes/mutations'
|
5
|
-
require 'rbbt/entity/study/genotypes/genes'
|
6
|
-
require 'rbbt/entity/study/genotypes/enrichment'
|
7
|
-
require 'rbbt/entity/study/genotypes/knowledge_base'
|
8
|
-
|
9
|
-
module StudyWorkflow
|
10
|
-
helper :organism do
|
11
|
-
study.metadata[:organism]
|
12
|
-
end
|
13
|
-
|
14
|
-
#task :binomial_significance => :tsv do
|
15
|
-
|
16
|
-
# tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :fields => ["Matches", "Bases", "Frequency", "p.value"], :namespace => organism)
|
17
|
-
|
18
|
-
# matches = study.knowledge_base.get_index(:mutation_genes).keys
|
19
|
-
# genes = matches.collect{|m| m.partition("~").last}.uniq
|
20
|
-
# all_mutations = matches.collect{|m| m.partition("~").first}.uniq
|
21
|
-
|
22
|
-
# total_bases = Gene.gene_list_exon_bases(genes)
|
23
|
-
# global_frequency = all_mutations.length.to_f / total_bases
|
24
|
-
|
25
|
-
# gene2exon_size = Misc.process_to_hash(genes){|genes| genes.collect{|gene| Gene.gene_list_exon_bases([gene]) }}
|
26
|
-
|
27
|
-
# genes.each do |gene|
|
28
|
-
# mutations = study.knowledge_base.parents(:mutation_genes, gene).target
|
29
|
-
# mutations = study.knowledge_base.subset(:sample_mutations, "Genomic Mutation" => mutations, "Sample" => :all).source
|
30
|
-
# next if mutations.empty?
|
31
|
-
# matches = mutations.length
|
32
|
-
# exon_bases = gene2exon_size[gene]
|
33
|
-
# next if exon_bases == 0
|
34
|
-
# frequency = matches.to_f / exon_bases
|
35
|
-
# pvalue = RSRuby.instance.binom_test(matches, exon_bases, global_frequency, 'greater')["p.value"]
|
36
|
-
# tsv[gene] = [matches, exon_bases, frequency, pvalue]
|
37
|
-
# end
|
38
|
-
|
39
|
-
# tsv
|
40
|
-
#end
|
41
|
-
|
42
|
-
task :genotype_overview => :tsv do
|
43
|
-
gene_overview = TSV.setup({},
|
44
|
-
:key_field => "Ensembl Gene ID",
|
45
|
-
:fields => ["Samples with gene mutated", "Samples with gene affected", "Samples with gene damaged", "Mutation significance"],
|
46
|
-
:type => :double
|
47
|
-
)
|
48
|
-
genotyped_samples = study.samples.select_by(:has_genotype?)
|
49
|
-
all_mutations = study.all_mutations
|
50
|
-
if all_mutations.empty?
|
51
|
-
gene_overview
|
52
|
-
else
|
53
|
-
|
54
|
-
log :affected_genes, "Computing how genes are affected by mutations"
|
55
|
-
mutation_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.genes}
|
56
|
-
mutation_affected_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.affected_genes}
|
57
|
-
if all_mutations.length < 5000
|
58
|
-
log :damaged_genes, "Computing damaged genes"
|
59
|
-
mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.damaged_genes}
|
60
|
-
else
|
61
|
-
mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| [nil] * all_mutations.length}
|
62
|
-
end
|
63
|
-
log :significance, "Computing mutation significance"
|
64
|
-
mutation_significance = NKIWorkflow.job(:significantly_mutated, study, :study => study, :threshold => 0.1).run
|
65
|
-
log :significance, "Reordering mutation significance file"
|
66
|
-
|
67
|
-
mutation_significance.identifiers = Organism.identifiers(study.organism)
|
68
|
-
mutation_significance = mutation_significance.change_key "Ensembl Gene ID"
|
69
|
-
|
70
|
-
log :samples, "Gathering affected samples"
|
71
|
-
samples_gene_status = {}
|
72
|
-
genotyped_samples.each do |sample|
|
73
|
-
samples_gene_status[sample] = {}
|
74
|
-
|
75
|
-
mutation_genes.values_at(*sample.mutations).each do |genes|
|
76
|
-
genes.each do |gene|
|
77
|
-
samples_gene_status[sample][gene] ||= [false, false, false]
|
78
|
-
samples_gene_status[sample][gene][0] = true
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
mutation_affected_genes.values_at(*sample.mutations).each do |genes|
|
83
|
-
genes.each do |gene|
|
84
|
-
samples_gene_status[sample][gene] ||= [false, false, false]
|
85
|
-
samples_gene_status[sample][gene][1] = true
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
mutation_damaged_genes.values_at(*sample.mutations).each do |genes|
|
90
|
-
next if genes.nil?
|
91
|
-
genes.each do |gene|
|
92
|
-
samples_gene_status[sample][gene] ||= [false, false, false]
|
93
|
-
samples_gene_status[sample][gene][2] = true
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
log :compiling, "Compiling result"
|
99
|
-
mutation_genes.values.compact.flatten.uniq.each do |gene|
|
100
|
-
gene_overview[gene] = []
|
101
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
|
102
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
|
103
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][2]}.collect{|sample, gene_status| sample}
|
104
|
-
gene_overview[gene] << [mutation_significance.include?(gene) ? mutation_significance[gene]["p.value"] : "> 0.1"]
|
105
|
-
end
|
106
|
-
|
107
|
-
gene_overview
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
module Study
|
113
|
-
def has_genotypes?
|
114
|
-
dir.genotypes.exists?
|
115
|
-
end
|
116
|
-
|
117
|
-
attr_accessor :watson
|
118
|
-
def watson
|
119
|
-
@watson = metadata[:watson] if @watson.nil?
|
120
|
-
@watson
|
121
|
-
end
|
122
|
-
|
123
|
-
def genotype_files
|
124
|
-
dir.genotypes.glob("*")
|
125
|
-
end
|
126
|
-
|
127
|
-
def cohort
|
128
|
-
@cohort ||= genotype_files.collect do |f|
|
129
|
-
name = File.basename(f)
|
130
|
-
genomic_mutations = Open.read(f).split("\n").sort
|
131
|
-
GenomicMutation.setup(genomic_mutations, name, organism, watson)
|
132
|
-
end.tap{|cohort| cohort.extend Genotype::Cohort}
|
133
|
-
end
|
134
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'rbbt/workflow'
|
2
|
-
|
3
|
-
Workflow.require_workflow "MutationEnrichment"
|
4
|
-
module StudyWorkflow
|
5
|
-
|
6
|
-
#{{{ SAMPLE ENRICHMENT
|
7
|
-
input :database, :string
|
8
|
-
input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
9
|
-
input :baseline, :select, "Type of baseline to use", :pathway_base_counts, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
10
|
-
input :permutations, :integer, "Number of permutations in test", 10000
|
11
|
-
input :fdr, :boolean, "BH FDR corrections", true
|
12
|
-
input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
13
|
-
task :sample_pathway_enrichment => :tsv do |database,mutation_subset,baseline,permutations,fdr,masked_genes|
|
14
|
-
|
15
|
-
mutations = study.send(mutation_subset)
|
16
|
-
|
17
|
-
mutation_tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Sample"], :type => :flat)
|
18
|
-
|
19
|
-
study.cohort.each do |genotype|
|
20
|
-
sample = genotype.jobname
|
21
|
-
genotype.each do |mutation|
|
22
|
-
next unless mutations.include? mutation
|
23
|
-
mutation_tsv[mutation] ||= []
|
24
|
-
mutation_tsv[mutation] << sample
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
job = MutationEnrichment.job(:sample_pathway_enrichment, study,
|
29
|
-
:mutations => mutation_tsv, :database => database, :baseline => baseline, :fdr => fdr,
|
30
|
-
:masked_genes => masked_genes, :organism => study.organism, :permutations => permutations)
|
31
|
-
|
32
|
-
res = job.run
|
33
|
-
set_info :total_covered, job.info[:total_covered]
|
34
|
-
set_info :covered_mutations, job.info[:covered_mutations]
|
35
|
-
res
|
36
|
-
end
|
37
|
-
|
38
|
-
#{{{ METAGENOTYPE ENRICHMENT
|
39
|
-
input :database, :string
|
40
|
-
input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
41
|
-
input :baseline, :select, "Type of baseline to use", :pathway_base_counts, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
42
|
-
input :fdr, :boolean, "BH FDR corrections", true
|
43
|
-
input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
44
|
-
task :mutation_pathway_enrichment => :tsv do |database,mutation_subset,baseline,fdr,masked_genes,organism|
|
45
|
-
|
46
|
-
mutations = study.send(mutation_subset)
|
47
|
-
|
48
|
-
job = MutationEnrichment.job(:mutation_pathway_enrichment, study,
|
49
|
-
:mutations => mutations, :database => database, :baseline => baseline, :fdr => fdr,
|
50
|
-
:masked_genes => masked_genes, :organism => study.organism)
|
51
|
-
res = job.run
|
52
|
-
set_info :total_covered, job.info[:total_covered]
|
53
|
-
set_info :covered_mutations, job.info[:covered_mutations]
|
54
|
-
res
|
55
|
-
end
|
56
|
-
end
|
@@ -1,104 +0,0 @@
|
|
1
|
-
module Study
|
2
|
-
property :genes_with_overlapping_mutations => :single do
|
3
|
-
mutations = cohort.metagenotype
|
4
|
-
mutations.genes.compact.flatten.uniq
|
5
|
-
end
|
6
|
-
|
7
|
-
property :altered_isoforms => :single do
|
8
|
-
mutated_isoforms = cohort.metagenotype.subset(relevant_mutations).mutated_isoforms.compact.flatten.uniq
|
9
|
-
return [] if mutated_isoforms.empty?
|
10
|
-
mutated_isoforms.select_by(:consequence){|c| c != "SYNONYMOUS"}
|
11
|
-
end
|
12
|
-
|
13
|
-
property :genes_with_altered_isoform_sequence => :single do
|
14
|
-
altered_isoforms = self.altered_isoforms
|
15
|
-
return [] if altered_isoforms.empty?
|
16
|
-
altered_isoforms.transcript.compact.gene.uniq
|
17
|
-
end
|
18
|
-
|
19
|
-
property :damaged_isoforms => :single do |*args|
|
20
|
-
altered_isoforms = self.altered_isoforms
|
21
|
-
return [] if altered_isoforms.empty?
|
22
|
-
altered_isoforms.select_by(:damaged?, *args)
|
23
|
-
end
|
24
|
-
|
25
|
-
property :genes_with_damaged_isoforms => :single do |*args|
|
26
|
-
damaged_isoforms = damaged_isoforms(*args)
|
27
|
-
return [] if damaged_isoforms.empty?
|
28
|
-
damaged_isoforms.transcript.gene.uniq
|
29
|
-
end
|
30
|
-
|
31
|
-
property :genes_with_affected_splicing_sites => :single do
|
32
|
-
cohort.metagenotype.subset(relevant_mutations).transcripts_with_affected_splicing.compact.flatten.uniq.gene.compact.uniq
|
33
|
-
end
|
34
|
-
|
35
|
-
property :affected_genes => :single do
|
36
|
-
Gene.setup(genes_with_altered_isoform_sequence + genes_with_affected_splicing_sites, "Ensembl Gene ID", organism).uniq
|
37
|
-
end
|
38
|
-
|
39
|
-
property :damaged_genes => :single do |*args|
|
40
|
-
Gene.setup((genes_with_damaged_isoforms(*args) + genes_with_affected_splicing_sites).uniq, "Ensembl Gene ID", organism)
|
41
|
-
end
|
42
|
-
|
43
|
-
property :samples_with_gene_damaged => :single do
|
44
|
-
damaging_mutations= self.damaging_mutations
|
45
|
-
|
46
|
-
samples_with_gene_damaged = {}
|
47
|
-
cohort.each do |genotype|
|
48
|
-
genotype.each do |mutation|
|
49
|
-
next unless damaging_mutations.include? mutation
|
50
|
-
genes = []
|
51
|
-
mis = mutation.mutated_isoforms
|
52
|
-
genes.concat mis.select_by(:damaged?).transcript.gene unless mis.nil? or mis.empty?
|
53
|
-
genes.concat mutation.transcripts_with_affected_splicing.gene
|
54
|
-
genes.uniq.each{|gene| samples_with_gene_damaged[gene] ||= []; samples_with_gene_damaged[gene] << genotype.jobname}
|
55
|
-
end
|
56
|
-
end
|
57
|
-
samples_with_gene_damaged
|
58
|
-
end
|
59
|
-
|
60
|
-
property :samples_with_gene_affected => :single do
|
61
|
-
relevant_mutations = self.relevant_mutations
|
62
|
-
|
63
|
-
samples_with_gene_affected = {}
|
64
|
-
cohort.each do |genotype|
|
65
|
-
genotype.each do |mutation|
|
66
|
-
next if mutation.nil?
|
67
|
-
next unless relevant_mutations.include? mutation
|
68
|
-
genes = []
|
69
|
-
mis = mutation.mutated_isoforms
|
70
|
-
genes.concat mis.select_by(:non_synonymous).transcript.gene unless mis.nil? or mis.empty?
|
71
|
-
genes.concat mutation.transcripts_with_affected_splicing.gene
|
72
|
-
genes.uniq.each{|gene| samples_with_gene_affected[gene] ||= []; samples_with_gene_affected[gene] << genotype.jobname}
|
73
|
-
end
|
74
|
-
end
|
75
|
-
samples_with_gene_affected
|
76
|
-
end
|
77
|
-
|
78
|
-
property :gene_sample_matrix => :single do
|
79
|
-
genotyped_samples = samples.select{|s| s.has_genotype?}.sort.uniq
|
80
|
-
|
81
|
-
tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list, :fields => genotyped_samples)
|
82
|
-
|
83
|
-
num_samples = genotyped_samples.length
|
84
|
-
genotyped_samples.each_with_index do |sample,i|
|
85
|
-
affected_genes = sample.affected_genes
|
86
|
-
next if affected_genes.empty?
|
87
|
-
affected_genes.clean_annotations.each do |gene|
|
88
|
-
tsv[gene] ||= ["FALSE"] * num_samples
|
89
|
-
tsv[gene][i] = "TRUE"
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
tsv.fields = genotyped_samples
|
94
|
-
|
95
|
-
tsv
|
96
|
-
end
|
97
|
-
|
98
|
-
property :recurrent_genes => :single do |*args|
|
99
|
-
min = args.first
|
100
|
-
min = 2 if min.nil?
|
101
|
-
|
102
|
-
Gene.setup(samples_with_gene_affected.select{|gene, samples| samples.length >= min }.collect{|gene,samples| gene}, "Ensembl Gene ID", organism)
|
103
|
-
end
|
104
|
-
end
|
@@ -1,81 +0,0 @@
|
|
1
|
-
require 'rbbt/workflow'
|
2
|
-
Workflow.require_workflow "Genomics"
|
3
|
-
require 'rbbt/entity/gene'
|
4
|
-
require 'rbbt/entity/genomic_mutation'
|
5
|
-
|
6
|
-
module Study
|
7
|
-
|
8
|
-
self.study_registry[:mutation_genes] = Proc.new{|study,database|
|
9
|
-
tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
10
|
-
study.cohort.metagenotype.uniq.each do |mutation|
|
11
|
-
tsv[mutation] = mutation.genes
|
12
|
-
end
|
13
|
-
tsv
|
14
|
-
}
|
15
|
-
|
16
|
-
self.study_registry[:mutation_affected_genes] = Proc.new{|study,database|
|
17
|
-
tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
18
|
-
study.cohort.metagenotype.uniq.each do |mutation|
|
19
|
-
tsv[mutation] = mutation.affected_genes
|
20
|
-
end
|
21
|
-
tsv
|
22
|
-
}
|
23
|
-
|
24
|
-
self.study_registry[:mutation_damaged_genes] = Proc.new{|study,database|
|
25
|
-
tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
26
|
-
study.cohort.metagenotype.uniq.each do |mutation|
|
27
|
-
tsv[mutation] = mutation.damaged_genes
|
28
|
-
end
|
29
|
-
tsv
|
30
|
-
}
|
31
|
-
|
32
|
-
self.study_registry[:sample_mutations] = Proc.new{|study,database|
|
33
|
-
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Genomic Mutation"], :type => :flat, :namespace => study.organism)
|
34
|
-
|
35
|
-
study.samples.select_by(:has_genotype?).each do |sample|
|
36
|
-
tsv[sample] = sample.mutations
|
37
|
-
end
|
38
|
-
|
39
|
-
tsv
|
40
|
-
}
|
41
|
-
|
42
|
-
self.study_registry[:sample_genes] = Proc.new{|study,database|
|
43
|
-
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID", "Genomic Mutation", "Affected isoform", "Damaged isoform", "Exon Junction"], :type => :double, :namespace => study.organism)
|
44
|
-
|
45
|
-
sample_mutations = study.knowledge_base.get_database(:sample_mutations, :source => "Sample")
|
46
|
-
all_mutations = study.all_mutations
|
47
|
-
mutations2mutated_isoforms = Misc.process_to_hash(all_mutations){|mutations| mutations.any? ? mutations.mutated_isoforms : [] }
|
48
|
-
mutations2exon_junction = Misc.process_to_hash(all_mutations){|mutations| mutations.any? ? mutations.in_exon_junction? : [] }
|
49
|
-
mi2damaged = Misc.process_to_hash(MutatedIsoform.setup(mutations2mutated_isoforms.values.flatten.compact.uniq, study.organism)){|mis| mis.any? ? mis.damaged? : [] }
|
50
|
-
#mi2damaged = Misc.process_to_hash(MutatedIsoform.setup(mutations2mutated_isoforms.values.flatten.compact.uniq, study.organism)){|mis| [false] * mis.length }
|
51
|
-
mi2consequence = Misc.process_to_hash(MutatedIsoform.setup(mutations2mutated_isoforms.values.flatten.compact.uniq, study.organism)){|mis| mis.any? ? mis.consequence : [] }
|
52
|
-
|
53
|
-
gene_mutations = study.knowledge_base.get_database(:mutation_genes, :source => "Ensembl Gene ID")
|
54
|
-
gene_mutations.unnamed = true
|
55
|
-
gene_mutations.entity_options["Genomic Mutation"] = {:watson => study.watson, :organism => study.organism}
|
56
|
-
study.samples.select_by(:has_genotype?).each do |sample|
|
57
|
-
values = sample.affected_genes.collect do |gene|
|
58
|
-
mutations = gene_mutations[gene] & (sample_mutations[sample] || [])
|
59
|
-
|
60
|
-
if mutations and mutations.any?
|
61
|
-
GenomicMutation.setup(mutations, "Mutations in #{ sample } over #{ gene }", study.organism, study.watson)
|
62
|
-
junction = mutations.select{|mutation| mutations2exon_junction[mutation] }.any?
|
63
|
-
|
64
|
-
mis = Annotated.flatten mutations2mutated_isoforms.values_at(*mutations).compact
|
65
|
-
|
66
|
-
affected = (mis.any? and mis.select{|mi| c = mi2consequence[mi]; ! %w(UTR SYNONYMOUS).include? c}.any?)
|
67
|
-
damaged = (mis.any? and mis.select{|mi| mi2damaged[mi] }.any?)
|
68
|
-
|
69
|
-
[gene, mutations * ";;", affected, damaged, junction]
|
70
|
-
else
|
71
|
-
[gene, "", false, false, false]
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
tsv[sample] = Misc.zip_fields values
|
76
|
-
end
|
77
|
-
|
78
|
-
tsv
|
79
|
-
}
|
80
|
-
|
81
|
-
end
|