rbbt-study 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/lib/rbbt/entity/study/cnv/genes.rb +28 -0
- data/lib/rbbt/entity/study/cnv/samples.rb +55 -0
- data/lib/rbbt/entity/study/cnv.rb +170 -0
- data/lib/rbbt/entity/study/enrichment.rb +418 -0
- data/lib/rbbt/entity/study/expression.rb +19 -0
- data/lib/rbbt/entity/study/features.rb +17 -0
- data/lib/rbbt/entity/study/genes.rb +104 -0
- data/lib/rbbt/entity/study/genotypes/enrichment.rb +56 -0
- data/lib/rbbt/entity/study/genotypes/genes.rb +103 -0
- data/lib/rbbt/entity/study/genotypes/knowledge_base.rb +39 -0
- data/lib/rbbt/entity/study/genotypes/mutations.rb +34 -0
- data/lib/rbbt/entity/study/genotypes/samples.rb +28 -0
- data/lib/rbbt/entity/study/genotypes.rb +110 -0
- data/lib/rbbt/entity/study/knowledge_base.rb +36 -0
- data/lib/rbbt/entity/study/methylation/samples.rb +31 -0
- data/lib/rbbt/entity/study/methylation.rb +90 -0
- data/lib/rbbt/entity/study/mutations.rb +259 -0
- data/lib/rbbt/entity/study/plots.rb +142 -0
- data/lib/rbbt/entity/study/samples.rb +61 -0
- data/lib/rbbt/entity/study/snp.rb +87 -0
- data/lib/rbbt/entity/study.rb +151 -0
- metadata +69 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
module Study
|
2
|
+
property :genes_with_overlapping_mutations => :single do
|
3
|
+
mutations = cohort.metagenotype
|
4
|
+
mutations.genes.compact.flatten.uniq
|
5
|
+
end
|
6
|
+
|
7
|
+
property :altered_isoforms => :single do
|
8
|
+
mutated_isoforms = cohort.metagenotype.subset(relevant_mutations).mutated_isoforms.compact.flatten.uniq
|
9
|
+
return [] if mutated_isoforms.empty?
|
10
|
+
mutated_isoforms.select_by(:consequence){|c| c != "SYNONYMOUS"}
|
11
|
+
end
|
12
|
+
|
13
|
+
property :genes_with_altered_isoform_sequence => :single do
|
14
|
+
altered_isoforms = self.altered_isoforms
|
15
|
+
return [] if altered_isoforms.empty?
|
16
|
+
altered_isoforms.transcript.compact.gene.uniq
|
17
|
+
end
|
18
|
+
|
19
|
+
property :damaged_isoforms => :single do |*args|
|
20
|
+
altered_isoforms = self.altered_isoforms
|
21
|
+
return [] if altered_isoforms.empty?
|
22
|
+
altered_isoforms.select_by(:damaged?, *args)
|
23
|
+
end
|
24
|
+
|
25
|
+
property :genes_with_damaged_isoforms => :single do |*args|
|
26
|
+
damaged_isoforms = damaged_isoforms(*args)
|
27
|
+
return [] if damaged_isoforms.empty?
|
28
|
+
damaged_isoforms.transcript.gene.uniq
|
29
|
+
end
|
30
|
+
|
31
|
+
property :genes_with_affected_splicing_sites => :single do
|
32
|
+
cohort.metagenotype.subset(relevant_mutations).transcripts_with_affected_splicing.compact.flatten.uniq.gene.compact.uniq
|
33
|
+
end
|
34
|
+
|
35
|
+
property :affected_genes => :single do
|
36
|
+
Gene.setup(genes_with_altered_isoform_sequence + genes_with_affected_splicing_sites, "Ensembl Gene ID", organism).uniq
|
37
|
+
end
|
38
|
+
|
39
|
+
property :damaged_genes => :single do |*args|
|
40
|
+
Gene.setup((genes_with_damaged_isoforms(*args) + genes_with_affected_splicing_sites).uniq, "Ensembl Gene ID", organism)
|
41
|
+
end
|
42
|
+
|
43
|
+
property :samples_with_gene_damaged => :single do
|
44
|
+
damaging_mutations= self.damaging_mutations
|
45
|
+
|
46
|
+
samples_with_gene_damaged = {}
|
47
|
+
cohort.each do |genotype|
|
48
|
+
genotype.each do |mutation|
|
49
|
+
next unless damaging_mutations.include? mutation
|
50
|
+
genes = []
|
51
|
+
mis = mutation.mutated_isoforms
|
52
|
+
genes.concat mis.select_by(:damaged?).transcript.gene unless mis.nil? or mis.empty?
|
53
|
+
genes.concat mutation.transcripts_with_affected_splicing.gene
|
54
|
+
genes.uniq.each{|gene| samples_with_gene_damaged[gene] ||= []; samples_with_gene_damaged[gene] << genotype.jobname}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
samples_with_gene_damaged
|
58
|
+
end
|
59
|
+
|
60
|
+
property :samples_with_gene_affected => :single do
|
61
|
+
relevant_mutations = self.relevant_mutations
|
62
|
+
|
63
|
+
samples_with_gene_affected = {}
|
64
|
+
cohort.each do |genotype|
|
65
|
+
genotype.each do |mutation|
|
66
|
+
next unless relevant_mutations.include? mutation
|
67
|
+
genes = []
|
68
|
+
mis = mutation.mutated_isoforms
|
69
|
+
genes.concat mis.select_by(:consequence){|c| c != "SYNONYMOUS"}.transcript.gene unless mis.nil? or mis.empty?
|
70
|
+
genes.concat mutation.transcripts_with_affected_splicing.gene
|
71
|
+
genes.uniq.each{|gene| samples_with_gene_affected[gene] ||= []; samples_with_gene_affected[gene] << genotype.jobname}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
samples_with_gene_affected
|
75
|
+
end
|
76
|
+
|
77
|
+
property :gene_sample_matrix => :single do
|
78
|
+
genotyped_samples = samples.select{|s| s.has_genotype?}.sort.uniq
|
79
|
+
|
80
|
+
tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list, :fields => genotyped_samples)
|
81
|
+
|
82
|
+
num_samples = genotyped_samples.length
|
83
|
+
genotyped_samples.each_with_index do |sample,i|
|
84
|
+
affected_genes = sample.affected_genes
|
85
|
+
next if affected_genes.empty?
|
86
|
+
affected_genes.clean_annotations.each do |gene|
|
87
|
+
tsv[gene] ||= ["FALSE"] * num_samples
|
88
|
+
tsv[gene][i] = "TRUE"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
tsv.fields = genotyped_samples
|
93
|
+
|
94
|
+
tsv
|
95
|
+
end
|
96
|
+
|
97
|
+
property :recurrent_genes => :single do |*args|
|
98
|
+
min = args.first
|
99
|
+
min = 2 if min.nil?
|
100
|
+
|
101
|
+
Gene.setup(samples_with_gene_affected.select{|gene, samples| samples.length >= min }.collect{|gene,samples| gene}, "Ensembl Gene ID", organism)
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Study
|
2
|
+
|
3
|
+
self.study_registry[:mutation_genes] = Proc.new{|study,database|
|
4
|
+
tsv = TSV.setup({}, :key_field => "Genomic Mutation", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
5
|
+
study.cohort.metagenotype.uniq.each do |mutation|
|
6
|
+
tsv[mutation] = mutation.genes
|
7
|
+
end
|
8
|
+
tsv
|
9
|
+
}
|
10
|
+
|
11
|
+
self.study_registry[:sample_mutations] = Proc.new{|study,database|
|
12
|
+
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Genomic Mutation"], :type => :flat, :namespace => study.organism)
|
13
|
+
study.samples.select_by(:has_genotype?).each do |sample|
|
14
|
+
tsv[sample] = sample.mutations
|
15
|
+
end
|
16
|
+
tsv
|
17
|
+
}
|
18
|
+
|
19
|
+
self.study_registry[:sample_genes] = Proc.new{|study,database|
|
20
|
+
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID"], :type => :flat, :namespace => study.organism)
|
21
|
+
study.samples.select_by(:has_genotype?).each do |sample|
|
22
|
+
tsv[sample] = sample.affected_genes
|
23
|
+
end
|
24
|
+
tsv
|
25
|
+
}
|
26
|
+
|
27
|
+
self.study_registry[:sample_genes2] = Proc.new{|study,database|
|
28
|
+
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID", "Genomic Mutation"], :type => :double, :namespace => study.organism)
|
29
|
+
kb = study.knowledge_base.get_database(:mutation_genes, :source => "Ensembl Gene ID")
|
30
|
+
study.samples.select_by(:has_genotype?).each do |sample|
|
31
|
+
values = sample.affected_genes.collect do |gene|
|
32
|
+
[gene, kb[gene] * ";;"]
|
33
|
+
end
|
34
|
+
tsv[sample] = Misc.zip_fields values
|
35
|
+
end
|
36
|
+
tsv
|
37
|
+
}
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Study
|
2
|
+
property :all_mutations do
|
3
|
+
cohort.metagenotype.tap{|o| o.jobname = "All mutations in #{ self }" }
|
4
|
+
end
|
5
|
+
|
6
|
+
property :relevant_mutations do
|
7
|
+
all_mutations = self.all_mutations
|
8
|
+
|
9
|
+
all_mutations.select_by(:relevant?).tap{|o| o.jobname = "Relevant mutations in #{ self }" }
|
10
|
+
end
|
11
|
+
|
12
|
+
property :damaging_mutations do |*args|
|
13
|
+
relevant_mutations.select_by(:damaging?, *args).tap{|o| o.jobname = "Damaging mutations in #{ self }" }
|
14
|
+
end
|
15
|
+
|
16
|
+
property :mutations_altering_isoform_sequence do
|
17
|
+
relevant_mutations.select{|m|
|
18
|
+
mis = m.mutated_isoforms; not mis.nil? and mis.select{|m| m.consequence != "SYNONYMOUS"}.any?
|
19
|
+
}.tap{|o| o.jobname = "Mutations altering isoform sequence in #{ self }"}
|
20
|
+
end
|
21
|
+
|
22
|
+
property :mutations_affecting_splicing_sites do
|
23
|
+
relevant_mutations.select_by(:transcripts_with_affected_splicing){|ts| ts.any? }.
|
24
|
+
tap{|o| o.jobname = "Mutations affecting splicing sites in #{ self }"}
|
25
|
+
end
|
26
|
+
|
27
|
+
property :mutations_over_gene do |gene|
|
28
|
+
all_mutations.select_by(:genes){|genes| genes and genes.include? gene}
|
29
|
+
end
|
30
|
+
|
31
|
+
property :mutations_over_gene_list do |list|
|
32
|
+
all_mutations.select_by(:genes){|genes| genes and (genes & list).any?}
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Sample
|
2
|
+
property :has_genotype? => :array2single do
|
3
|
+
study.cohort.values_at(*self).collect{|g| not g.nil?}
|
4
|
+
end
|
5
|
+
|
6
|
+
property :mutations do
|
7
|
+
Study.setup(study)
|
8
|
+
study.cohort[self]
|
9
|
+
end
|
10
|
+
|
11
|
+
property :relevant_mutations do
|
12
|
+
mutations.select_by(:relevant?)
|
13
|
+
end
|
14
|
+
|
15
|
+
property :damaging_mutations do |*args|
|
16
|
+
mutations.select_by(:damaging?, *args)
|
17
|
+
end
|
18
|
+
|
19
|
+
property :affected_genes do
|
20
|
+
mutations.affected_genes.compact.flatten.uniq
|
21
|
+
end
|
22
|
+
|
23
|
+
property :damaged_genes do |*args|
|
24
|
+
mutations.damaged_genes(*args).compact.flatten.uniq
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'rbbt/entity/genotype'
|
2
|
+
|
3
|
+
require 'rbbt/entity/study/genotypes/samples'
|
4
|
+
require 'rbbt/entity/study/genotypes/mutations'
|
5
|
+
require 'rbbt/entity/study/genotypes/genes'
|
6
|
+
require 'rbbt/entity/study/genotypes/enrichment'
|
7
|
+
require 'rbbt/entity/study/genotypes/knowledge_base'
|
8
|
+
|
9
|
+
Workflow.require_workflow "NKIWorkflow"
|
10
|
+
Workflow.require_workflow "TSVWorkflow"
|
11
|
+
|
12
|
+
module StudyWorkflow
|
13
|
+
helper :organism do
|
14
|
+
study.metadata[:organism]
|
15
|
+
end
|
16
|
+
|
17
|
+
task :genotype_overview => :tsv do
|
18
|
+
gene_overview = TSV.setup({},
|
19
|
+
:key_field => "Ensembl Gene ID",
|
20
|
+
:fields => ["Samples with gene mutated", "Samples with gene affected", "Samples with gene damaged", "Mutation significance"],
|
21
|
+
:type => :double
|
22
|
+
)
|
23
|
+
genotyped_samples = study.samples.select_by(:has_genotype?)
|
24
|
+
all_mutations = study.all_mutations
|
25
|
+
if all_mutations.empty?
|
26
|
+
gene_overview
|
27
|
+
else
|
28
|
+
|
29
|
+
log :affected_genes, "Computing how genes are affected by mutations"
|
30
|
+
mutation_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.genes}
|
31
|
+
mutation_affected_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.affected_genes}
|
32
|
+
if all_mutations.length < 5000
|
33
|
+
log :damaged_genes, "Computing genes damaged genes"
|
34
|
+
mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| all_mutations.damaged_genes}
|
35
|
+
else
|
36
|
+
mutation_damaged_genes = Misc.process_to_hash(all_mutations){|all_mutations| [nil] * all_mutations.length}
|
37
|
+
end
|
38
|
+
log :significance, "Computing mutation significance"
|
39
|
+
mutation_significance = NKIWorkflow.job(:significantly_mutated, study, :study => study, :threshold => 0.1).run
|
40
|
+
log :significance, "Reordering mutation significance file"
|
41
|
+
|
42
|
+
#TSVWorkflow.job(:change_id, study, :format => "Ensembl Gene ID", :tsv => mutation_significance).run
|
43
|
+
mutation_significance.identifiers = Organism.identifiers(study.organism)
|
44
|
+
mutation_significance = mutation_significance.change_key "Ensembl Gene ID"
|
45
|
+
|
46
|
+
log :samples, "Gathering affected samples"
|
47
|
+
samples_gene_status = {}
|
48
|
+
genotyped_samples.each do |sample|
|
49
|
+
samples_gene_status[sample] = {}
|
50
|
+
|
51
|
+
mutation_genes.values_at(*sample.mutations).each do |genes|
|
52
|
+
genes.each do |gene|
|
53
|
+
samples_gene_status[sample][gene] ||= [false, false, false]
|
54
|
+
samples_gene_status[sample][gene][0] = true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
mutation_affected_genes.values_at(*sample.mutations).each do |genes|
|
59
|
+
genes.each do |gene|
|
60
|
+
samples_gene_status[sample][gene] ||= [false, false, false]
|
61
|
+
samples_gene_status[sample][gene][1] = true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
mutation_damaged_genes.values_at(*sample.mutations).each do |genes|
|
66
|
+
next if genes.nil?
|
67
|
+
genes.each do |gene|
|
68
|
+
samples_gene_status[sample][gene] ||= [false, false, false]
|
69
|
+
samples_gene_status[sample][gene][2] = true
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
log :compiling, "Compiling result"
|
75
|
+
mutation_genes.values.compact.flatten.uniq.each do |gene|
|
76
|
+
gene_overview[gene] = []
|
77
|
+
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
|
78
|
+
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
|
79
|
+
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][2]}.collect{|sample, gene_status| sample}
|
80
|
+
gene_overview[gene] << [mutation_significance.include?(gene) ? mutation_significance[gene]["p.value"] : "> 0.1"]
|
81
|
+
end
|
82
|
+
|
83
|
+
gene_overview
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
module Study
|
89
|
+
def has_genotypes?
|
90
|
+
dir.genotypes.exists?
|
91
|
+
end
|
92
|
+
|
93
|
+
attr_accessor :watson
|
94
|
+
def watson
|
95
|
+
@watson = metadata[:watson] if @watson.nil?
|
96
|
+
@watson
|
97
|
+
end
|
98
|
+
|
99
|
+
def genotype_files
|
100
|
+
dir.genotypes.glob("*")
|
101
|
+
end
|
102
|
+
|
103
|
+
def cohort
|
104
|
+
@cohort ||= genotype_files.collect do |f|
|
105
|
+
name = File.basename(f)
|
106
|
+
genomic_mutations = Open.read(f).split("\n").sort
|
107
|
+
GenomicMutation.setup(genomic_mutations, name, organism, watson)
|
108
|
+
end.tap{|cohort| cohort.extend Genotype::Cohort}
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Study
|
2
|
+
|
3
|
+
class << self
|
4
|
+
attr_accessor :knowledge_base, :study_registry
|
5
|
+
def knowledge_base
|
6
|
+
@knowledge_base ||= KnowledgeBase.new Rbbt.var.knowledge_base.Study
|
7
|
+
end
|
8
|
+
|
9
|
+
def study_registry
|
10
|
+
@study_registry ||= {}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_accessor :knowledge_base
|
15
|
+
|
16
|
+
def knowledge_base
|
17
|
+
@knowledge_base ||= begin
|
18
|
+
kb = KnowledgeBase.new(self.dir.var.knowledge_base, self.organism)
|
19
|
+
kb.format["Gene"] = "Ensembl Gene ID"
|
20
|
+
kb.entity_options["Sample"] = {"Study" => self}
|
21
|
+
Study.study_registry.each do |database, file|
|
22
|
+
Log.debug("Inheriting #{ database } from registry: #{Misc.fingerprint file}")
|
23
|
+
if Proc === file
|
24
|
+
study = self
|
25
|
+
block = Proc.new{ file.call(self, database) }
|
26
|
+
block.define_singleton_method(:filename) do [database, study] * "@" end
|
27
|
+
kb.register database, nil, {}, &block
|
28
|
+
else
|
29
|
+
kb.register database, file
|
30
|
+
end
|
31
|
+
end
|
32
|
+
kb
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Sample
|
2
|
+
property :methylation => :array2single do
|
3
|
+
study.methylation_cohort
|
4
|
+
end
|
5
|
+
|
6
|
+
property :has_methylation? => :array2single do
|
7
|
+
study.methylation_cohort.values_at(*self).collect{|methylation| not methylation.nil?}
|
8
|
+
end
|
9
|
+
|
10
|
+
property :methylated => :single do
|
11
|
+
return [] if methylation.empty?
|
12
|
+
methylation.select_by(:methylated?)
|
13
|
+
end
|
14
|
+
|
15
|
+
property :unmethylated => :single do
|
16
|
+
return [] if methylation.empty?
|
17
|
+
methylation.select_by(:unmethylated?)
|
18
|
+
end
|
19
|
+
|
20
|
+
property :methylated_genes => :single do
|
21
|
+
return [] if methylated.empty?
|
22
|
+
Gene.setup(methylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
|
23
|
+
end
|
24
|
+
|
25
|
+
property :unmethylated_genes => :single do
|
26
|
+
return [] if unmethylated.empty?
|
27
|
+
Gene.setup(unmethylated.genes.flatten.uniq, "Ensembl Gene ID", organism)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'rbbt/entity/methylation'
|
2
|
+
|
3
|
+
require 'rbbt/entity/study/methylation/samples'
|
4
|
+
|
5
|
+
module StudyWorkflow
|
6
|
+
helper :organism do
|
7
|
+
study.metadata[:organism]
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module Study
|
12
|
+
def has_methylation?
|
13
|
+
dir.methylation.exists?
|
14
|
+
end
|
15
|
+
|
16
|
+
def methylation_files
|
17
|
+
dir.methylation.find.glob("*")
|
18
|
+
end
|
19
|
+
|
20
|
+
def methylation_cohort
|
21
|
+
if @methylation_cohort.nil?
|
22
|
+
@methylation_cohort = {}
|
23
|
+
methylation_files.each do |f|
|
24
|
+
sample = File.basename(f)
|
25
|
+
Sample.setup(sample, self)
|
26
|
+
methylations = Open.read(f).split("\n").sort
|
27
|
+
Methylation.setup(methylations, organism)
|
28
|
+
@methylation_cohort[sample] = methylations
|
29
|
+
end
|
30
|
+
end
|
31
|
+
@methylation_cohort
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
module Study
|
36
|
+
property :recurrently_lost_genes => :single do |threshold|
|
37
|
+
counts = {}
|
38
|
+
self.samples.each do |sample|
|
39
|
+
next unless sample.has_methylation?
|
40
|
+
puts sample
|
41
|
+
|
42
|
+
genes = nil
|
43
|
+
genes = sample.lost_genes.clean_annotations
|
44
|
+
genes.each do |gene|
|
45
|
+
counts[gene] ||= 0
|
46
|
+
counts[gene] += 1
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
|
51
|
+
Gene.setup(recurrent, "Ensembl Gene ID", organism)
|
52
|
+
end
|
53
|
+
|
54
|
+
property :recurrently_gained_genes => :single do |threshold|
|
55
|
+
counts = {}
|
56
|
+
self.samples.each do |sample|
|
57
|
+
next unless sample.has_methylation?
|
58
|
+
puts sample
|
59
|
+
|
60
|
+
genes = nil
|
61
|
+
genes = sample.gained_genes.clean_annotations
|
62
|
+
genes.each do |gene|
|
63
|
+
counts[gene] ||= 0
|
64
|
+
counts[gene] += 1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
|
69
|
+
Gene.setup(recurrent, "Ensembl Gene ID", organism)
|
70
|
+
end
|
71
|
+
|
72
|
+
property :gene_sample_methylation_matrix => :single do
|
73
|
+
tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
|
74
|
+
samples = []
|
75
|
+
i = 0
|
76
|
+
num_samples = cohort.length
|
77
|
+
methylation_cohort.each do |sample,methylation|
|
78
|
+
methylation.genes.compact.flatten.uniq.each do |gene|
|
79
|
+
tsv[gene] ||= ["FALSE"] * num_samples
|
80
|
+
tsv[gene][i] = "TRUE"
|
81
|
+
end
|
82
|
+
samples << sample
|
83
|
+
i += 1
|
84
|
+
end
|
85
|
+
|
86
|
+
tsv.fields = samples
|
87
|
+
|
88
|
+
tsv
|
89
|
+
end
|
90
|
+
end
|