rbbt-study 0.2.30 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- metadata +2 -24
- data/lib/rbbt/entity/study.rb +0 -172
- data/lib/rbbt/entity/study/cnv.rb +0 -170
- data/lib/rbbt/entity/study/cnv/genes.rb +0 -28
- data/lib/rbbt/entity/study/cnv/knowledge_base.rb +0 -39
- data/lib/rbbt/entity/study/cnv/samples.rb +0 -54
- data/lib/rbbt/entity/study/enrichment.rb +0 -418
- data/lib/rbbt/entity/study/expression.rb +0 -24
- data/lib/rbbt/entity/study/features.rb +0 -17
- data/lib/rbbt/entity/study/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes.rb +0 -134
- data/lib/rbbt/entity/study/genotypes/enrichment.rb +0 -56
- data/lib/rbbt/entity/study/genotypes/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes/knowledge_base.rb +0 -81
- data/lib/rbbt/entity/study/genotypes/mutations.rb +0 -34
- data/lib/rbbt/entity/study/genotypes/samples.rb +0 -28
- data/lib/rbbt/entity/study/knowledge_base.rb +0 -35
- data/lib/rbbt/entity/study/methylation.rb +0 -90
- data/lib/rbbt/entity/study/methylation/samples.rb +0 -31
- data/lib/rbbt/entity/study/mutations.rb +0 -259
- data/lib/rbbt/entity/study/plots.rb +0 -140
- data/lib/rbbt/entity/study/samples.rb +0 -78
- data/lib/rbbt/entity/study/snp.rb +0 -87
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
module Sample
|
|
2
|
-
property :cnvs => :array2single do
|
|
3
|
-
study.has_cnv? ? study.cnv_cohort : {}
|
|
4
|
-
end
|
|
5
|
-
|
|
6
|
-
property :has_cnv? => :array2single do
|
|
7
|
-
study.has_cnv? ? cnvs.values_at(*self).collect{|cnvs| not cnvs.nil?} : [false] * self.length
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
property :gene_CN => :single do
|
|
11
|
-
gene_CN = {}
|
|
12
|
-
cnvs.variation.zip(cnvs.genes).each do |var, genes|
|
|
13
|
-
next if genes.empty?
|
|
14
|
-
genes = Annotated.purge genes
|
|
15
|
-
case var
|
|
16
|
-
when "loss"
|
|
17
|
-
genes.each{|gene| gene_CN[gene] = "Lost"}
|
|
18
|
-
when "gain"
|
|
19
|
-
genes.each{|gene| gene_CN[gene] = "Gained"}
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
gene_CN
|
|
23
|
-
end
|
|
24
|
-
persist :gene_CN
|
|
25
|
-
|
|
26
|
-
property :gained_cnvs => :single do
|
|
27
|
-
return nil if cnvs.nil?
|
|
28
|
-
return [] if cnvs.empty?
|
|
29
|
-
cnvs.select_by(:gain?)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
property :lost_cnvs => :single do
|
|
33
|
-
return nil if cnvs.nil?
|
|
34
|
-
return [] if cnvs.empty?
|
|
35
|
-
cnvs.select_by(:loss?)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
property :gained_genes => :single do
|
|
39
|
-
Gene.setup(gene_CN.select{|g,v| v == "Gained"}.collect{|g,v| g}, "Ensembl Gene ID", self.study.organism)
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
property :lost_genes => :single do
|
|
43
|
-
Gene.setup(gene_CN.select{|g,v| v == "Lost"}.collect{|g,v| g}, "Ensembl Gene ID", self.study.organism)
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
property :cnv_genes => :single do
|
|
47
|
-
return nil if lost_genes.nil? or gained_genes.nil?
|
|
48
|
-
organism = study.metadata[:organism]
|
|
49
|
-
Gene.setup((lost_genes + gained_genes).uniq, "Ensembl Gene ID", lost_genes.organism)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
|
|
@@ -1,418 +0,0 @@
|
|
|
1
|
-
#{{{ SAMPLE ENRICHMENT
|
|
2
|
-
dep do |jobname, inputs| job(inputs[:mutation_subset] || :relevant_mutations, jobname, inputs) end
|
|
3
|
-
input :database, :string, "Database code"
|
|
4
|
-
input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
|
5
|
-
input :baseline, :select, "Type of baseline to use", :bases, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
|
6
|
-
input :permutations, :integer, "Number of permutations in test", 10000
|
|
7
|
-
input :fdr, :boolean, "BH FDR corrections", true
|
|
8
|
-
input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
9
|
-
input :organism, :string, "Organism code", metadata[:organism]
|
|
10
|
-
task :sample_pathway_enrichment => :tsv do |database,mutation_subset,baseline,permutations,fdr,masked_genes,organism|
|
|
11
|
-
|
|
12
|
-
mutations = step(mutation_subset).load
|
|
13
|
-
|
|
14
|
-
mutation_tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Sample"], :type => :flat)
|
|
15
|
-
|
|
16
|
-
study.cohort.each do |genotype|
|
|
17
|
-
sample = genotype.jobname
|
|
18
|
-
genotype.each do |mutation|
|
|
19
|
-
next unless mutations.include? mutation
|
|
20
|
-
mutation_tsv[mutation] ||= []
|
|
21
|
-
mutation_tsv[mutation] << sample
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
job = MutationEnrichment.job(:sample_pathway_enrichment, study,
|
|
26
|
-
:mutations => mutation_tsv, :database => database, :baseline => baseline, :fdr => fdr,
|
|
27
|
-
:masked_genes => masked_genes, :organism => organism, :permutations => permutations)
|
|
28
|
-
|
|
29
|
-
res = job.run
|
|
30
|
-
set_info :total_covered, job.info[:total_covered]
|
|
31
|
-
set_info :covered_mutations, job.info[:covered_mutations]
|
|
32
|
-
res
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
#{{{ METAGENOTYPE ENRICHMENT
|
|
36
|
-
dep do |jobname, inputs| job(inputs[:mutation_subset] || :relevant_mutations, jobname, inputs) end
|
|
37
|
-
input :database, :string, "Database code"
|
|
38
|
-
input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
|
39
|
-
input :baseline, :select, "Type of baseline to use", :pathway_base_counts, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
|
40
|
-
input :fdr, :boolean, "BH FDR corrections", true
|
|
41
|
-
input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
42
|
-
input :organism, :string, "Organism code", metadata[:organism]
|
|
43
|
-
task :mutation_pathway_enrichment => :tsv do |database,mutation_subset,baseline,fdr,masked_genes,organism|
|
|
44
|
-
|
|
45
|
-
mutations = step(mutation_subset).load
|
|
46
|
-
|
|
47
|
-
job = MutationEnrichment.job(:mutation_pathway_enrichment, study,
|
|
48
|
-
:mutations => mutations, :database => database, :baseline => baseline, :fdr => fdr,
|
|
49
|
-
:masked_genes => masked_genes, :organism => organism)
|
|
50
|
-
res = job.run
|
|
51
|
-
set_info :total_covered, job.info[:total_covered]
|
|
52
|
-
set_info :covered_mutations, job.info[:covered_mutations]
|
|
53
|
-
res
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
################################################################
|
|
62
|
-
#{{{ OLD
|
|
63
|
-
################################################################
|
|
64
|
-
|
|
65
|
-
#{{{ BASE AND GENE COUNTS
|
|
66
|
-
|
|
67
|
-
#input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
68
|
-
#input :organism, :string, "Organism code", metadata[:organism]
|
|
69
|
-
#task :pathway_base_counts => :tsv do |masked_genes, organism|
|
|
70
|
-
# database = clean_name
|
|
71
|
-
# log :loading_genes, "Loading genes from #{ database } #{ organism }"
|
|
72
|
-
# case database
|
|
73
|
-
# when 'kegg'
|
|
74
|
-
# tsv = KEGG.gene_pathway.tsv :key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :type => :flat, :persist => true, :merge => true
|
|
75
|
-
# total_genes = Gene.setup(tsv.values.compact.flatten.uniq, "KEGG Gene ID", organism).ensembl.compact
|
|
76
|
-
# when 'go', 'go_bp'
|
|
77
|
-
# tsv = Organism.gene_go_bp(organism).tsv :key_field => "GO ID", :fields => ["Ensembl Gene ID"], :type => :flat, :persist => true, :merge => true
|
|
78
|
-
# total_genes = Gene.setup(tsv.values.compact.flatten.uniq, "Ensembl Gene ID", organism).ensembl.compact
|
|
79
|
-
# when 'pfam'
|
|
80
|
-
# tsv = Organism.gene_pfam(organism).tsv :key_field => "Pfam Domain", :fields => ["Ensembl Gene ID"], :type => :flat, :persist => true, :merge => true
|
|
81
|
-
# total_genes = Gene.setup(tsv.values.compact.flatten.uniq, "Ensembl Gene ID", organism).ensembl.compact
|
|
82
|
-
# end
|
|
83
|
-
#
|
|
84
|
-
# tsv.namespace = organism
|
|
85
|
-
#
|
|
86
|
-
# counts = TSV.setup({}, :key_field => tsv.key_field, :fields => ["Bases"], :type => :single, :cast => :to_i, :namespace => organism)
|
|
87
|
-
#
|
|
88
|
-
# log :processing_database, "Processing database #{database}"
|
|
89
|
-
# tsv.with_monitor do
|
|
90
|
-
# tsv.through do |pathway, genes|
|
|
91
|
-
# next if genes.nil? or genes.empty?
|
|
92
|
-
# size = Gene.gene_list_exon_bases(genes.ensembl.compact.remove(masked_genes))
|
|
93
|
-
# counts[pathway] = size
|
|
94
|
-
# end
|
|
95
|
-
# end
|
|
96
|
-
#
|
|
97
|
-
# log :computing_exome_size, "Computing number of exome bases covered by pathway annotations"
|
|
98
|
-
# total_size = Gene.gene_list_exon_bases(total_genes.remove(masked_genes))
|
|
99
|
-
#
|
|
100
|
-
# set_info :total_size, total_size
|
|
101
|
-
# set_info :total_gene_list, total_genes.remove(masked_genes)
|
|
102
|
-
#
|
|
103
|
-
# counts
|
|
104
|
-
#end
|
|
105
|
-
#
|
|
106
|
-
#input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
107
|
-
#input :organism, :string, "Organism code", metadata[:organism]
|
|
108
|
-
#task :pathway_gene_counts => :tsv do |masked_genes,organism|
|
|
109
|
-
# database = clean_name
|
|
110
|
-
# case database.to_s
|
|
111
|
-
# when 'kegg'
|
|
112
|
-
# tsv = KEGG.gene_pathway.tsv :key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :type => :flat, :persist => true, :merge => true
|
|
113
|
-
# total_genes = Gene.setup(tsv.values.compact.flatten.uniq, "KEGG Gene ID", organism).ensembl.compact
|
|
114
|
-
# when 'go', 'go_bp'
|
|
115
|
-
# tsv = Organism.gene_go_bp(organism).tsv :key_field => "GO ID", :fields => ["Ensembl Gene ID"], :type => :flat, :persist => true, :merge => true
|
|
116
|
-
# total_genes = Gene.setup(tsv.values.compact.flatten.uniq, "Ensembl Gene ID", organism).ensembl.compact
|
|
117
|
-
# when 'pfam'
|
|
118
|
-
# tsv = Organism.gene_pfam(organism).tsv :key_field => "Pfam Domain", :fields => ["Ensembl Gene ID"], :type => :flat, :persist => true, :merge => true
|
|
119
|
-
# total_genes = Gene.setup(tsv.values.compact.flatten.uniq, "Ensembl Gene ID", organism).ensembl.compact
|
|
120
|
-
# end
|
|
121
|
-
#
|
|
122
|
-
# counts = TSV.setup({}, :key_field => tsv.key_field, :fields => ["Genes"], :type => :single, :cast => :to_i, :namespace => organism)
|
|
123
|
-
#
|
|
124
|
-
# tsv.through do |pathway, genes|
|
|
125
|
-
# next if genes.nil? or genes.empty?
|
|
126
|
-
# genes = genes.ensembl.remove(masked_genes)
|
|
127
|
-
# num = genes.length
|
|
128
|
-
# counts[pathway] = num
|
|
129
|
-
# end
|
|
130
|
-
#
|
|
131
|
-
# set_info :total_genes, total_genes.remove(masked_genes).length
|
|
132
|
-
# set_info :total_gene_list, total_genes.remove(masked_genes)
|
|
133
|
-
#
|
|
134
|
-
# counts
|
|
135
|
-
#end
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
#
|
|
139
|
-
#
|
|
140
|
-
#dep do |jobname, inputs| job(inputs[:baseline], inputs[:database].to_s, inputs) end
|
|
141
|
-
#dep do |jobname, inputs| job(inputs[:mutation_subset] || :relevant_mutations, jobname, inputs) end
|
|
142
|
-
#dep :affected_samples_per_pathway
|
|
143
|
-
#input :database, :string
|
|
144
|
-
#input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
|
145
|
-
#input :baseline, :select, "Type of baseline to use", :bases, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
|
146
|
-
#input :permutations, :integer, "Number of permutations in test", 10000
|
|
147
|
-
#input :fdr, :boolean, "BH FDR corrections", true
|
|
148
|
-
#input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
149
|
-
#input :organism, :string, "Organism code", metadata[:organism]
|
|
150
|
-
#task :sample_pathway_enrichment_old => :tsv do |database,mutation_subset,baseline,permutations,fdr,masked_genes,organism|
|
|
151
|
-
# pathway_counts = step(baseline).load
|
|
152
|
-
# pathway_counts.unnamed = true
|
|
153
|
-
# total_covered = step(baseline).info[:total_size] || step(baseline).info[:total_genes]
|
|
154
|
-
# total_pathway_genes_list = step(baseline).info[:total_gene_list]
|
|
155
|
-
# mutations = step(mutation_subset).load
|
|
156
|
-
# affected_samples_per_pathway = step(:affected_samples_per_pathway).load
|
|
157
|
-
# affected_samples_per_pathway.namespace = organism
|
|
158
|
-
#
|
|
159
|
-
# affected_genes = mutations.genes.compact.flatten.uniq
|
|
160
|
-
#
|
|
161
|
-
# case database.to_s
|
|
162
|
-
# when 'kegg'
|
|
163
|
-
# database_tsv = KEGG.gene_pathway.tsv :key_field => 'KEGG Pathway ID', :fields => ["KEGG Gene ID"], :type => :flat, :persist => true, :unnamed => false, :merge => true
|
|
164
|
-
# when 'go', 'go_bp'
|
|
165
|
-
# database_tsv = Organism.gene_go_bp(organism).tsv :key_field => "GO ID", :fields => ["Ensembl Gene ID"], :type => :flat, :persist => true, :unnamed => false, :merge => true
|
|
166
|
-
# when 'pfam'
|
|
167
|
-
# database_tsv = Organism.gene_pfam(organism).tsv :key_field => "Pfam Domain", :fields => ["Ensembl Gene ID"], :type => :flat, :persist => true, :unnamed => false, :merge => true
|
|
168
|
-
# end
|
|
169
|
-
#
|
|
170
|
-
# covered_genes_per_samples = {}
|
|
171
|
-
# samples = []
|
|
172
|
-
# study.cohort.each do |genotype|
|
|
173
|
-
# samples << genotype.jobname
|
|
174
|
-
# covered_genes_per_samples[genotype.jobname] = genotype.subset(mutations).genes.compact.flatten.subset(total_pathway_genes_list)
|
|
175
|
-
# end
|
|
176
|
-
#
|
|
177
|
-
# sample_mutation_tokens = []
|
|
178
|
-
# samples.collect{|sample| study.cohort[sample].subset(mutations).genes.select{|l| not l.nil? and (l & total_pathway_genes_list).any? }.length.times{ sample_mutation_tokens << sample } }
|
|
179
|
-
#
|
|
180
|
-
# mutation_genes = Misc.process_to_hash(mutations){|list| list.genes}
|
|
181
|
-
# covered_mutations = mutations.select{|mutation|(mutation_genes[mutation] & total_pathway_genes_list).any? }.length
|
|
182
|
-
#
|
|
183
|
-
# pathways = pathway_counts.keys
|
|
184
|
-
#
|
|
185
|
-
# pathway_expected_counts = {}
|
|
186
|
-
# pathway_counts.with_monitor :desc => "Pathway gene counts" do
|
|
187
|
-
# pathway_counts.through do |pathway, count|
|
|
188
|
-
# next unless affected_samples_per_pathway.include?(pathway) and affected_samples_per_pathway[pathway].any?
|
|
189
|
-
# ratio = count.to_f / total_covered
|
|
190
|
-
# num_token_list = RSRuby.instance.rbinom(permutations, sample_mutation_tokens.length, ratio)
|
|
191
|
-
# pathway_expected_counts[pathway] = num_token_list.collect{|num_tokens|
|
|
192
|
-
# Misc.sample(sample_mutation_tokens, num_tokens.to_i).uniq.length
|
|
193
|
-
# }
|
|
194
|
-
# end
|
|
195
|
-
# end
|
|
196
|
-
#
|
|
197
|
-
# tsv = TSV.setup({}, :key_field => affected_samples_per_pathway.key_field, :fields => ["Sample", "Matches", "Expected", "Ratio", "Pathway total", "p-value", "Ensembl Gene ID"], :namespace => organism, :type => :double)
|
|
198
|
-
# affected_samples_per_pathway.through do |pathway, samples|
|
|
199
|
-
# next unless samples.any?
|
|
200
|
-
# next unless pathway_expected_counts.include? pathway
|
|
201
|
-
# pathway_genes = database_tsv[pathway].ensembl
|
|
202
|
-
# samples = samples.uniq.select{|sample| (covered_genes_per_samples[sample] & pathway_genes).any?}
|
|
203
|
-
# count = samples.length
|
|
204
|
-
# expected = Misc.mean(pathway_expected_counts[pathway]).floor
|
|
205
|
-
# pvalue = pathway_expected_counts[pathway].select{|exp_c| exp_c > count}.length.to_f / permutations
|
|
206
|
-
# tsv[pathway] = [samples.sort, [count], [expected], [count.to_f / expected], [pathway_counts[pathway]], [pvalue], pathway_genes.subset(affected_genes)]
|
|
207
|
-
# end
|
|
208
|
-
#
|
|
209
|
-
# FDR.adjust_hash! tsv, 5 if fdr
|
|
210
|
-
#
|
|
211
|
-
# set_info :covered_mutations, covered_mutations
|
|
212
|
-
# set_info :total_covered, total_covered
|
|
213
|
-
#
|
|
214
|
-
# tsv
|
|
215
|
-
#end
|
|
216
|
-
#
|
|
217
|
-
#
|
|
218
|
-
#
|
|
219
|
-
#
|
|
220
|
-
#dep do |jobname, inputs| job(inputs[:baseline], inputs[:database].to_s, inputs) end
|
|
221
|
-
#dep do |jobname, inputs| job(inputs[:mutation_subset] || :relevant_mutations, jobname, inputs) end
|
|
222
|
-
#input :database, :string
|
|
223
|
-
#input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
|
224
|
-
#input :baseline, :select, "Type of baseline to use", :bases, :select_options => [:pathway_base_counts, :pathway_gene_counts]
|
|
225
|
-
#input :fdr, :boolean, "BH FDR corrections", true
|
|
226
|
-
#input :masked_genes, :array, "Ensembl Gene ID list of genes to mask", []
|
|
227
|
-
#input :organism, :string, "Organism code", metadata[:organism]
|
|
228
|
-
#task :mutation_pathway_enrichment_old => :tsv do |database,mutation_subset,baseline,fdr,masked_genes,organism|
|
|
229
|
-
# counts = step(baseline).load
|
|
230
|
-
# total_covered = step(baseline).info[:total_size] || step(baseline).info[:total_genes]
|
|
231
|
-
# mutations = step(mutation_subset).load
|
|
232
|
-
#
|
|
233
|
-
# affected_genes = mutations.genes.compact.flatten.uniq
|
|
234
|
-
#
|
|
235
|
-
# # Get database tsv and native ids
|
|
236
|
-
#
|
|
237
|
-
# case database
|
|
238
|
-
# when 'kegg'
|
|
239
|
-
# database_tsv = KEGG.gene_pathway.tsv :key_field => 'KEGG Gene ID', :fields => ["KEGG Pathway ID"], :type => :flat, :persist => true, :unnamed => true, :merge => true
|
|
240
|
-
# affected_genes_db = affected_genes.to_kegg
|
|
241
|
-
# all_db_genes = Gene.setup(database_tsv.keys, "KEGG Gene ID", organism).ensembl.uniq
|
|
242
|
-
# when 'go', 'go_bp'
|
|
243
|
-
# database_tsv = Organism.gene_go_bp(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :persist => true, :unnamed => true, :merge => true
|
|
244
|
-
# affected_genes_db = affected_genes
|
|
245
|
-
# all_db_genes = Gene.setup(database_tsv.keys, "KEGG Gene ID", organism).uniq
|
|
246
|
-
# when 'pfam'
|
|
247
|
-
# database_tsv = Organism.gene_pfam(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["Pfam Domain"], :type => :flat, :persist => true, :unnamed => true, :merge => true
|
|
248
|
-
# affected_genes_db = affected_genes
|
|
249
|
-
# all_db_genes = Gene.setup(database_tsv.keys, "KEGG Gene ID", organism).uniq
|
|
250
|
-
# end
|
|
251
|
-
#
|
|
252
|
-
# affected_genes = affected_genes.remove(masked_genes)
|
|
253
|
-
# all_db_genes = all_db_genes.remove(masked_genes)
|
|
254
|
-
#
|
|
255
|
-
# # Annotate each pathway with the affected genes that are involved in it
|
|
256
|
-
#
|
|
257
|
-
# affected_genes_per_pathway = {}
|
|
258
|
-
# affected_genes_db.zip(affected_genes).each do |gene_db,gene|
|
|
259
|
-
# next if gene_db.nil?
|
|
260
|
-
# pathways = database_tsv[gene_db]
|
|
261
|
-
# next if pathways.nil?
|
|
262
|
-
# pathways.uniq.each do |pathway|
|
|
263
|
-
# affected_genes_per_pathway[pathway] ||= []
|
|
264
|
-
# affected_genes_per_pathway[pathway] << gene
|
|
265
|
-
# end
|
|
266
|
-
# end
|
|
267
|
-
#
|
|
268
|
-
# pvalues = TSV.setup({}, :key_field => database_tsv.fields.first, :fields => ["Matches", "Pathway total", "p-value", "Ensembl Gene ID"], :namespace => organism, :type => :double)
|
|
269
|
-
# mutation_genes = Misc.process_to_hash(mutations){|list| list.genes}
|
|
270
|
-
# covered_mutations = mutations.select{|mutation|(mutation_genes[mutation] & all_db_genes).any? }.length
|
|
271
|
-
#
|
|
272
|
-
# affected_genes_per_pathway.each do |pathway, genes|
|
|
273
|
-
# pathway_total = counts[pathway]
|
|
274
|
-
# matches = mutations.select{|mutation| (mutation_genes[mutation] & genes).any? }.length
|
|
275
|
-
# pvalue = RSRuby.instance.binom_test(matches, covered_mutations, pathway_total.to_f / total_covered.to_f, "greater")["p.value"]
|
|
276
|
-
#
|
|
277
|
-
# pvalues[pathway] = [[matches], [pathway_total], [pvalue], affected_genes.subset(genes).uniq.sort_by{|g| g.name || g}]
|
|
278
|
-
# end
|
|
279
|
-
#
|
|
280
|
-
# FDR.adjust_hash! pvalues, 2 if fdr
|
|
281
|
-
#
|
|
282
|
-
# set_info :covered_mutations, covered_mutations
|
|
283
|
-
# set_info :total_covered, total_covered
|
|
284
|
-
#
|
|
285
|
-
# pvalues
|
|
286
|
-
#end
|
|
287
|
-
#
|
|
288
|
-
#
|
|
289
|
-
#dep do |jobname, inputs| job(:pathway_base_counts, inputs[:database].to_s, inputs) end
|
|
290
|
-
#dep do |jobname, inputs| job(inputs[:mutation_subset] || :relevant_mutations, jobname, inputs) end
|
|
291
|
-
#dep :affected_genes
|
|
292
|
-
#input :database, :string
|
|
293
|
-
#input :fdr, :boolean, "BH FDR corrections", true
|
|
294
|
-
#input :mutation_subset, :select, "Mutation subset to use", :relevant_mutations
|
|
295
|
-
#
|
|
296
|
-
##{{{ RATIOS
|
|
297
|
-
#
|
|
298
|
-
#dep :affected_samples_per_pathway
|
|
299
|
-
#dep :affected_genes
|
|
300
|
-
#dep do |name, inputs|
|
|
301
|
-
# database = inputs[:database]
|
|
302
|
-
# if inputs[:type] == :genes
|
|
303
|
-
# job(:pathway_base_counts, database, inputs)
|
|
304
|
-
# else
|
|
305
|
-
# job(:pathway_gene_counts, database, inputs)
|
|
306
|
-
# end
|
|
307
|
-
#end
|
|
308
|
-
#task :pathway_sample_ratios => :tsv do
|
|
309
|
-
# num_samples = study.cohort.length
|
|
310
|
-
#
|
|
311
|
-
# affected_samples_per_pathway = step(:affected_samples_per_pathway).load
|
|
312
|
-
# affected_genes = step(:affected_genes).load
|
|
313
|
-
#
|
|
314
|
-
# ratios = TSV.setup({}, :key_field => sample_pathway_probability.key_field, :fields => ["Num Samples", "Expected", "Ratio", "Ensembl Gene ID"], :namespace => organism)
|
|
315
|
-
#
|
|
316
|
-
# pathways.through do |pathway, probability|
|
|
317
|
-
# next unless affected_samples_per_pathway.include?(pathway) and affected_samples_per_pathway[pathway].length > 1
|
|
318
|
-
# affected_samples = (affected_samples_per_pathway[pathway] || []).length
|
|
319
|
-
# ratios[pathway] = [affected_samples, probability * num_samples, affected_samples.to_f / (probability * num_samples), pathway.genes.ensembl.subset(affected_genes)]
|
|
320
|
-
# end
|
|
321
|
-
#
|
|
322
|
-
# ratios.namespace = organism
|
|
323
|
-
#
|
|
324
|
-
# ratios
|
|
325
|
-
#end
|
|
326
|
-
#
|
|
327
|
-
|
|
328
|
-
#input :database, :string
|
|
329
|
-
#dep :damaging_mutations
|
|
330
|
-
#dep :affected_genes
|
|
331
|
-
#task :pathway_mutation_ratios => :tsv do |database|
|
|
332
|
-
# damaging_mutations = step(:damaging_mutations).load
|
|
333
|
-
#
|
|
334
|
-
# affected_genes = step(:affected_genes).load
|
|
335
|
-
# affected_genes.organism = organism
|
|
336
|
-
#
|
|
337
|
-
# pathways = case database
|
|
338
|
-
# when 'go'
|
|
339
|
-
# affected_genes.go_terms
|
|
340
|
-
# when 'go_bp'
|
|
341
|
-
# affected_genes.go_bp_terms
|
|
342
|
-
# when 'go_cc'
|
|
343
|
-
# affected_genes.go_cc_terms
|
|
344
|
-
# when 'go_mf'
|
|
345
|
-
# affected_genes.go_mf_terms
|
|
346
|
-
# when 'pfam'
|
|
347
|
-
# affected_genes.pfam_domains
|
|
348
|
-
# else
|
|
349
|
-
# affected_genes.send(database + '_pathways')
|
|
350
|
-
# end.compact.flatten.uniq
|
|
351
|
-
#
|
|
352
|
-
# key_field = nil
|
|
353
|
-
# pathway_genes = Misc.process_to_hash(pathways) do |pathways|
|
|
354
|
-
# pathways.uniq.collect do |pathway|
|
|
355
|
-
# case database
|
|
356
|
-
# when 'kegg'
|
|
357
|
-
# KeggPathway.setup(pathway, organism)
|
|
358
|
-
# key_field = "KEGG Pathway ID"
|
|
359
|
-
# Gene.setup(pathway.genes, "KEGG Gene ID", organism).ensembl
|
|
360
|
-
# when 'go', 'go_bp', 'go_mf', 'go_cc'
|
|
361
|
-
# GOTerm.setup(pathway, organism)
|
|
362
|
-
# key_field = "GO ID"
|
|
363
|
-
# Gene.setup(pathway.genes, "Ensembl Gene ID", organism).ensembl
|
|
364
|
-
# when 'pfam'
|
|
365
|
-
# PfamDomain.setup(pathway, organism)
|
|
366
|
-
# key_field = "Pfam Domain"
|
|
367
|
-
# Gene.setup(pathway.genes, "Ensembl Gene ID", organism).ensembl
|
|
368
|
-
# end
|
|
369
|
-
# end
|
|
370
|
-
# end
|
|
371
|
-
#
|
|
372
|
-
# pathway_mutation_ratios = TSV.setup({}, :key_field => key_field,
|
|
373
|
-
# :fields => ["Mutations per MB", "# Mutations", "# Damaging Mut.", "# Genes", "# Bases", "Ensembl Gene ID"], :type => :double)
|
|
374
|
-
#
|
|
375
|
-
#
|
|
376
|
-
# pathway_sizes = Misc.process_to_hash(pathways) do |pathways|
|
|
377
|
-
# pathways.collect{|pathway| Gene.gene_list_exon_bases(pathway_genes[pathway])}
|
|
378
|
-
# end
|
|
379
|
-
#
|
|
380
|
-
# pathways_for_mutations = Misc.process_to_hash(study.cohort.flatten){|all_mutations| Gene.setup(study.cohort.collect{|genotype| genotype.genes }.flatten(1), "Ensembl Gene ID", organism).collect{|genes|
|
|
381
|
-
# genes.nil? ? [] : genes.collect{|gene|
|
|
382
|
-
# case database
|
|
383
|
-
# when 'go'
|
|
384
|
-
# gene.go_terms
|
|
385
|
-
# when 'go_bp'
|
|
386
|
-
# gene.go_bp_terms
|
|
387
|
-
# when 'go_cc'
|
|
388
|
-
# gene.go_cc_terms
|
|
389
|
-
# when 'go_mf'
|
|
390
|
-
# gene.go_mf_terms
|
|
391
|
-
# when 'pfam'
|
|
392
|
-
# gene.pfam_domains
|
|
393
|
-
# else
|
|
394
|
-
# affected_genes.send(database + '_pathways')
|
|
395
|
-
# end
|
|
396
|
-
# }.flatten}
|
|
397
|
-
# }
|
|
398
|
-
#
|
|
399
|
-
# mutations_in_pathway = Misc.process_to_hash(pathways) do |pathways|
|
|
400
|
-
# pathways.collect do |pathway|
|
|
401
|
-
# GenomicMutation.setup(pathways_for_mutations.select{|mut,pths| pths and pths.include? pathway}.collect{|mut, pths| mut}, "Pathway mutations #{pathway} in #{study}", organism, watson)
|
|
402
|
-
# end
|
|
403
|
-
# end
|
|
404
|
-
#
|
|
405
|
-
# pathways.each do |pathway|
|
|
406
|
-
# pathway_mutations = mutations_in_pathway[pathway]
|
|
407
|
-
# next if pathway_mutations.one?
|
|
408
|
-
# pathway_score = pathway_mutations.length.to_f / pathway_sizes[pathway]
|
|
409
|
-
# genes = pathway_mutations.genes.compact.flatten.subset(affected_genes).subset(pathway_genes[pathway])
|
|
410
|
-
#
|
|
411
|
-
# pathway_mutation_ratios[pathway] = [["%.5g" % (pathway_score * 1_000_000)], [pathway_mutations.length], [pathway_mutations.subset(damaging_mutations).length], [pathway_genes[pathway].length], [pathway_sizes[pathway]], genes]
|
|
412
|
-
# end
|
|
413
|
-
#
|
|
414
|
-
# pathway_mutation_ratios.namespace = organism
|
|
415
|
-
#
|
|
416
|
-
# pathway_mutation_ratios
|
|
417
|
-
#end
|
|
418
|
-
#
|