rbbt-study 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/lib/rbbt/entity/study/cnv/genes.rb +28 -0
- data/lib/rbbt/entity/study/cnv/samples.rb +55 -0
- data/lib/rbbt/entity/study/cnv.rb +170 -0
- data/lib/rbbt/entity/study/enrichment.rb +418 -0
- data/lib/rbbt/entity/study/expression.rb +19 -0
- data/lib/rbbt/entity/study/features.rb +17 -0
- data/lib/rbbt/entity/study/genes.rb +104 -0
- data/lib/rbbt/entity/study/genotypes/enrichment.rb +56 -0
- data/lib/rbbt/entity/study/genotypes/genes.rb +103 -0
- data/lib/rbbt/entity/study/genotypes/knowledge_base.rb +39 -0
- data/lib/rbbt/entity/study/genotypes/mutations.rb +34 -0
- data/lib/rbbt/entity/study/genotypes/samples.rb +28 -0
- data/lib/rbbt/entity/study/genotypes.rb +110 -0
- data/lib/rbbt/entity/study/knowledge_base.rb +36 -0
- data/lib/rbbt/entity/study/methylation/samples.rb +31 -0
- data/lib/rbbt/entity/study/methylation.rb +90 -0
- data/lib/rbbt/entity/study/mutations.rb +259 -0
- data/lib/rbbt/entity/study/plots.rb +142 -0
- data/lib/rbbt/entity/study/samples.rb +61 -0
- data/lib/rbbt/entity/study/snp.rb +87 -0
- data/lib/rbbt/entity/study.rb +151 -0
- metadata +69 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
task :mutations_by_change => :tsv do
|
2
|
+
changes = {}
|
3
|
+
|
4
|
+
study.cohort.each do |genotype|
|
5
|
+
genotype.watson ||= watson
|
6
|
+
genotype.each do |mutation|
|
7
|
+
reference = watson ? mutation.reference : mutation.gene_strand_reference
|
8
|
+
base = mutation.base
|
9
|
+
base = ((Misc::IUPAC2BASE[base] || []) - [reference]) * ","
|
10
|
+
change = [reference, base]
|
11
|
+
changes[change * ">"] ||= []
|
12
|
+
changes[change * ">"] << mutation.clean_annotations
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
TSV.setup(changes, :key_field => "Genomic Change", :fields => ["Genomic Mutation"], :namespace => organism, :type => :flat)
|
17
|
+
|
18
|
+
changes.entity_options = {:watson => watson}
|
19
|
+
|
20
|
+
changes
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
dep :mutations_by_change
|
25
|
+
task :mutation_change_counts => :yaml do
|
26
|
+
change_counts = {}
|
27
|
+
|
28
|
+
step(:mutations_by_change).load.each do |change, mutations|
|
29
|
+
change_counts[change] = mutations.length
|
30
|
+
end
|
31
|
+
|
32
|
+
change_counts
|
33
|
+
end
|
34
|
+
|
35
|
+
returns "Genomic Mutation"
|
36
|
+
task :transversions => :annotations do
|
37
|
+
|
38
|
+
mutations = study.cohort.collect{|genotype|
|
39
|
+
|
40
|
+
genotype.select{|mutation|
|
41
|
+
|
42
|
+
mutation.type == "transversion"
|
43
|
+
|
44
|
+
}
|
45
|
+
|
46
|
+
}.flatten
|
47
|
+
|
48
|
+
GenomicMutation.setup(mutations, "#{ study }: transversions", organism, watson)
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
returns "Genomic Mutation"
|
53
|
+
task :transitions => :annotations do
|
54
|
+
|
55
|
+
mutations = study.cohort.collect{|genotype|
|
56
|
+
|
57
|
+
genotype.select{|mutation|
|
58
|
+
|
59
|
+
mutation.type == "transition"
|
60
|
+
|
61
|
+
}
|
62
|
+
|
63
|
+
}.flatten
|
64
|
+
|
65
|
+
GenomicMutation.setup(mutations, "#{ study }: transitions", organism, watson)
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
returns "Genomic Mutation"
|
70
|
+
task :indels => :annotations do
|
71
|
+
|
72
|
+
mutations = study.cohort.collect{|genotype|
|
73
|
+
|
74
|
+
genotype.select{|mutation|
|
75
|
+
|
76
|
+
mutation.type == "indel"
|
77
|
+
|
78
|
+
}
|
79
|
+
|
80
|
+
}.flatten
|
81
|
+
|
82
|
+
GenomicMutation.setup(mutations, "#{ study }: indels", organism, watson)
|
83
|
+
end
|
84
|
+
|
85
|
+
returns "Genomic Mutation"
|
86
|
+
task :unknown_mutations => :annotations do
|
87
|
+
|
88
|
+
mutations = study.cohort.collect{|genotype|
|
89
|
+
|
90
|
+
genotype.select{|mutation|
|
91
|
+
|
92
|
+
mutation.type == "unknown"
|
93
|
+
|
94
|
+
}
|
95
|
+
|
96
|
+
}.flatten
|
97
|
+
|
98
|
+
GenomicMutation.setup(mutations, "#{ study }: unknown_mutations", organism, watson)
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
returns "Genomic Mutation"
|
103
|
+
task :not_mutations => :annotations do
|
104
|
+
|
105
|
+
mutations = study.cohort.collect{|genotype|
|
106
|
+
|
107
|
+
genotype.select{|mutation|
|
108
|
+
|
109
|
+
mutation.type == "none"
|
110
|
+
|
111
|
+
}
|
112
|
+
|
113
|
+
}.flatten
|
114
|
+
|
115
|
+
GenomicMutation.setup(mutations, "#{ study }: not mutations", organism, watson)
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
returns "Genomic Mutation"
|
121
|
+
task :non_synonymous_mutations => :annotations do
|
122
|
+
|
123
|
+
mutations = study.cohort.collect{|genotype|
|
124
|
+
|
125
|
+
genotype.select{|mutation|
|
126
|
+
|
127
|
+
(mutation.mutated_isoforms || [] ).select{|mi| mi.non_synonymous }.any?
|
128
|
+
|
129
|
+
}
|
130
|
+
|
131
|
+
}.flatten
|
132
|
+
|
133
|
+
GenomicMutation.setup(mutations, "#{ study }: non_synonymous mutations", organism, watson)
|
134
|
+
end
|
135
|
+
|
136
|
+
dep :non_synonymous_mutations
|
137
|
+
returns "Genomic Mutation"
|
138
|
+
task :synonymous_mutations => :annotations do
|
139
|
+
non_synonymous_mutations = step(:non_synonymous_mutations).load
|
140
|
+
|
141
|
+
mutations = study.cohort.collect{|genotype|
|
142
|
+
|
143
|
+
genotype.remove( non_synonymous_mutations )
|
144
|
+
|
145
|
+
}.flatten
|
146
|
+
|
147
|
+
GenomicMutation.setup(mutations, "#{ study }: synonymous mutations", organism, watson)
|
148
|
+
end
|
149
|
+
|
150
|
+
#dep :synonymous_mutations
|
151
|
+
#dep :exon_junction_mutations
|
152
|
+
#input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
|
153
|
+
#returns "Genomic Mutation"
|
154
|
+
#task :damaging_mutations => :annotations do |methods|
|
155
|
+
# synonymous_mutations = step(:synonymous_mutations).load
|
156
|
+
# exon_junction_mutations = step(:exon_junction_mutations).load
|
157
|
+
#
|
158
|
+
# mutations_to_remove = synonymous_mutations - exon_junction_mutations
|
159
|
+
#
|
160
|
+
# mutations = study.cohort.collect{|genotype|
|
161
|
+
#
|
162
|
+
# genotype.remove( mutations_to_remove ).select{|mutation| mutation.damaging?(methods) }
|
163
|
+
#
|
164
|
+
# }.flatten
|
165
|
+
#
|
166
|
+
# GenomicMutation.setup(mutations, "#{ study }: damaging mutations", organism, watson)
|
167
|
+
#end
|
168
|
+
|
169
|
+
dep :relevant_mutations
|
170
|
+
input :methods, :array, "Damage prediction methods", [:sift, :mutation_assessor]
|
171
|
+
returns "Genomic Mutation"
|
172
|
+
task :damaging_mutations => :annotations do |methods|
|
173
|
+
relevant_mutations = step(:relevant_mutations ).load
|
174
|
+
|
175
|
+
mutations = relevant_mutations.select{|mutation| mutation.damaging?(methods) }
|
176
|
+
|
177
|
+
GenomicMutation.setup(mutations, "#{ study }: damaging mutations", organism, watson)
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
dep :damaging_mutations
|
182
|
+
dep :relevant_mutations
|
183
|
+
input :methods, :array, "Damage prediction methods", [:sift]
|
184
|
+
returns "Genomic Mutation"
|
185
|
+
task :mutations_missing_predictions => :annotations do |methods|
|
186
|
+
damaging_mutations = step(:damaging_mutations).load
|
187
|
+
relevant_mutations = step(:relevant_mutations).load
|
188
|
+
|
189
|
+
missing_mutations = relevant_mutations.remove(damaging_mutations)
|
190
|
+
missing_mutations_mutated_isoforms = missing_mutations.mutated_isoforms.compact.flatten
|
191
|
+
mutated_isoforms_missing_damage_scores = missing_mutations_mutated_isoforms.select{|mis| mis.damage_scores.nil?}
|
192
|
+
mutations_missing_predictions = missing_mutations.select{|mutation| mutation.mutated_isoforms and mutation.mutated_isoforms.any?}.select{|mutation| mutation.mutated_isoforms.remove(mutated_isoforms_missing_damage_scores).empty?}
|
193
|
+
GenomicMutation.setup(mutations_missing_predictions, "#{ study }: mutations missing predictions", organism, watson)
|
194
|
+
end
|
195
|
+
|
196
|
+
returns "Genomic Mutation"
|
197
|
+
task :exon_junction_mutations => :annotations do
|
198
|
+
|
199
|
+
mutations = study.cohort.collect{|genotype|
|
200
|
+
|
201
|
+
genotype.select{|mutation| mutation.transcripts_with_affected_splicing.any? and not mutation.type == "none"}
|
202
|
+
|
203
|
+
}.flatten
|
204
|
+
|
205
|
+
GenomicMutation.setup(mutations, "#{ study }: exon junction mutations", organism, watson)
|
206
|
+
end
|
207
|
+
|
208
|
+
dep :non_synonymous_mutations
|
209
|
+
dep :exon_junction_mutations
|
210
|
+
returns "Genomic Mutation"
|
211
|
+
task :relevant_mutations => :annotations do
|
212
|
+
non_synonymous_mutations = step(:non_synonymous_mutations).load
|
213
|
+
exon_junction_mutations = step(:exon_junction_mutations).load
|
214
|
+
|
215
|
+
all_relevant_mutations = ( exon_junction_mutations + non_synonymous_mutations.remove(exon_junction_mutations) ).flatten
|
216
|
+
|
217
|
+
GenomicMutation.setup(all_relevant_mutations, "#{ study }: relevant mutations", organism, watson)
|
218
|
+
end
|
219
|
+
|
220
|
+
dep :relevant_mutations
|
221
|
+
returns "Genomic Mutation"
|
222
|
+
task :recurrent_mutations => :annotations do
|
223
|
+
relevant_mutations = step(:relevant_mutations).load
|
224
|
+
|
225
|
+
mutations = Misc.counts(relevant_mutations.remove_score).select{|mutation, count|
|
226
|
+
|
227
|
+
count > 1
|
228
|
+
|
229
|
+
}.collect{|mutation, count| mutation}
|
230
|
+
|
231
|
+
GenomicMutation.setup(mutations, "#{study}: recurrent mutations", organism, watson)
|
232
|
+
end
|
233
|
+
|
234
|
+
dep :non_synonymous_mutations
|
235
|
+
task :mutations_by_consequence => :yaml do
|
236
|
+
non_synonymous_mutations = step(:non_synonymous_mutations).load
|
237
|
+
|
238
|
+
mutations_by_consequence = {}
|
239
|
+
study.cohort.each do |genotype|
|
240
|
+
genotype.subset(non_synonymous_mutations).each do |mutation|
|
241
|
+
mis = mutation.mutated_isoforms
|
242
|
+
next if mis.nil?
|
243
|
+
consequences = mis.consequence.compact.uniq
|
244
|
+
consequences.each{|consequence| mutations_by_consequence[consequence] ||= []; mutations_by_consequence[consequence] << mutation }
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
mutations_by_consequence
|
249
|
+
end
|
250
|
+
%w(missense_mutations nonsense_mutations frameshift_mutations nostop_mutations indel_mutations utr_mutations ).zip(
|
251
|
+
%w(MISS-SENSE NONSENSE FRAMESHIFT NOSTOP INDEL UTR)).each do |task_name, consequence|
|
252
|
+
dep :mutations_by_consequence
|
253
|
+
returns "Genomic Mutation"
|
254
|
+
task task_name => :annotations do
|
255
|
+
mutations_by_consequence = step(:mutations_by_consequence).load
|
256
|
+
GenomicMutation.setup(mutations_by_consequence[consequence] || [], "#{study}: mutations with #{consequence.downcase} isoform mutations", organism, watson)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
@@ -0,0 +1,142 @@
|
|
1
|
+
input :cutoff, :integer, "Pixels of image", 2
|
2
|
+
input :size, :integer, "Pixels of image", 14
|
3
|
+
task :gene_mutation_plot => :binary do |cutoff, size|
|
4
|
+
png_file = file(study + ".png")
|
5
|
+
FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
|
6
|
+
study.R "
|
7
|
+
library(ggplot2)
|
8
|
+
library(plyr)
|
9
|
+
library(reshape)
|
10
|
+
|
11
|
+
layer.mutations = rbbt.SE.plot.mutations('#{study}', cutoff=#{cutoff});
|
12
|
+
p <- ggplot() + layer.mutations
|
13
|
+
|
14
|
+
p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
|
15
|
+
|
16
|
+
ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
|
17
|
+
"
|
18
|
+
Open.read(png_file, :mode => 'rb')
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
input :database, :string, "Database code", :kegg
|
23
|
+
input :size, :integer, "Pixels of image", 14
|
24
|
+
task :pathway_mutation_plot => :binary do |database,size|
|
25
|
+
png_file = file(study + ".png")
|
26
|
+
FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
|
27
|
+
study.R "
|
28
|
+
library(ggplot2)
|
29
|
+
library(plyr)
|
30
|
+
library(reshape)
|
31
|
+
|
32
|
+
|
33
|
+
study = '#{study}'
|
34
|
+
# Sample mutations
|
35
|
+
sample.mutated.genes = rbbt.SE.sample.mutated.genes(study)
|
36
|
+
sample.mutated.genes$Sample = rownames(sample.mutated.genes)
|
37
|
+
|
38
|
+
# Pathway enrichment
|
39
|
+
pathway.enrichment = rbbt.ruby.substitutions(
|
40
|
+
\"
|
41
|
+
require 'rbbt/workflow'
|
42
|
+
require 'rbbt/entity'
|
43
|
+
require 'rbbt/entity/gene'
|
44
|
+
require 'rbbt/sources/pfam'
|
45
|
+
require 'rbbt/sources/kegg'
|
46
|
+
require 'rbbt/sources/go'
|
47
|
+
|
48
|
+
YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
|
49
|
+
|
50
|
+
Workflow.require_workflow 'StudyExplorer'
|
51
|
+
|
52
|
+
study = Study.setup('STUDY')
|
53
|
+
|
54
|
+
Log.severity = 0
|
55
|
+
|
56
|
+
pathways = study.job(:mutation_pathway_enrichment, study, :baseline => :pathway_base_counts, :database => '#{database}', :fdr => false).run.select('p-value'){|pvalue| pvalue = pvalue.first.to_f if Array === pvalue; pvalue < 0.2}
|
57
|
+
pathways.add_field 'Name' do |pathway, values|
|
58
|
+
[pathway.name]
|
59
|
+
end
|
60
|
+
|
61
|
+
pathways.add_field 'Gene' do |pathway, values|
|
62
|
+
values['Ensembl Gene ID'].name
|
63
|
+
end
|
64
|
+
|
65
|
+
pathways = pathways.select('Name'){|name| name.first.to_s !~ /cancer|olfactory|glioma|melanoma|malaria|leukemia|carcinoma|sarcoma/i}
|
66
|
+
|
67
|
+
\", substitutions=list(STUDY=study));
|
68
|
+
|
69
|
+
|
70
|
+
# Sample pathway mutations
|
71
|
+
find.mutated.pathways.for.sample <- function(x, pathway.info){
|
72
|
+
all.genes = names(x);
|
73
|
+
genes = all.genes[x==TRUE];
|
74
|
+
ddply(pathway.info, 'Name', function(x){pathway.genes = unlist(strsplit(x$Gene, '\\\\|')); if (length(intersect(genes, pathway.genes)) > 0){TRUE}else{FALSE}})
|
75
|
+
}
|
76
|
+
sample.pathway.mutations = ddply(sample.mutated.genes, 'Sample', find.mutated.pathways.for.sample, pathway.info = pathway.enrichment)
|
77
|
+
names(sample.pathway.mutations) = c('Sample', 'Pathway', 'Mutated')
|
78
|
+
|
79
|
+
p <- ggplot(sample.pathway.mutations) + geom_tile(aes(x=Sample, y=Pathway, alpha=Mutated))
|
80
|
+
|
81
|
+
p <- rbbt.SE.plot.sort.by.pathway.mutations(p)
|
82
|
+
|
83
|
+
|
84
|
+
# Mark repeated genes
|
85
|
+
|
86
|
+
|
87
|
+
d = p$data
|
88
|
+
d$Exclusive = FALSE
|
89
|
+
|
90
|
+
pathway.genes = list();
|
91
|
+
for(pathway in levels(d$Pathway)){
|
92
|
+
pathway.genes[pathway] = strsplit(pathway.enrichment[pathway.enrichment[,'Name'] == pathway, 'Gene'], '\\\\|')
|
93
|
+
}
|
94
|
+
|
95
|
+
find.exclusive.pathway.genes <- function(data, pathways){
|
96
|
+
found.genes = c();
|
97
|
+
exclusive.pathway.genes = list();
|
98
|
+
sample = as.character(unique(data$Sample));
|
99
|
+
for(pathway in pathways){
|
100
|
+
current.pathway.genes = pathway.genes[[pathway]];
|
101
|
+
sample.genes = names(sample.mutated.genes)[sample.mutated.genes[sample,] == TRUE]
|
102
|
+
sample.pathway.genes = intersect(current.pathway.genes, sample.genes);
|
103
|
+
exclusive.genes = setdiff(sample.pathway.genes, found.genes);
|
104
|
+
found.genes = c(found.genes, exclusive.genes)
|
105
|
+
exclusive.pathway.genes[[pathway]] = exclusive.genes
|
106
|
+
}
|
107
|
+
|
108
|
+
return(exclusive.pathway.genes);
|
109
|
+
}
|
110
|
+
|
111
|
+
exclusive.pathway.genes = dlply(d, 'Sample', find.exclusive.pathway.genes, pathways = levels(d$Pathway))
|
112
|
+
|
113
|
+
for( sample in names(exclusive.pathway.genes)){
|
114
|
+
pathway.exclusive.genes = exclusive.pathway.genes[[sample]];
|
115
|
+
for( pathway in names(pathway.exclusive.genes)){
|
116
|
+
if (length(pathway.exclusive.genes[[pathway]]) > 0){
|
117
|
+
print(sample)
|
118
|
+
print(pathway)
|
119
|
+
d[(d$Sample == sample & d$Pathway == pathway), 'Exclusive'] = TRUE
|
120
|
+
}
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
p$data = d
|
125
|
+
|
126
|
+
|
127
|
+
p <- p + aes(fill=Exclusive)
|
128
|
+
|
129
|
+
p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
|
130
|
+
|
131
|
+
p
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
|
137
|
+
"
|
138
|
+
Open.read(png_file, :mode => 'rb')
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Sample
|
2
|
+
extend Entity
|
3
|
+
|
4
|
+
annotation :study
|
5
|
+
|
6
|
+
self.format = ["Sample ID"]
|
7
|
+
|
8
|
+
def dir
|
9
|
+
return nil if study.nil?
|
10
|
+
return study.dir if study.respond_to? :dir
|
11
|
+
begin
|
12
|
+
Study.setup(study).dir
|
13
|
+
rescue
|
14
|
+
Log.warn "Error accessing sample dir from study: #{$!.message}"
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def organism
|
20
|
+
return nil if study.nil?
|
21
|
+
study.organism
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
module Study
|
26
|
+
|
27
|
+
def sample_info
|
28
|
+
return nil unless dir.samples.exists?
|
29
|
+
@sample_info ||= dir.samples.tsv.tap{|tsv| tsv.entity_options = {:study => self }}
|
30
|
+
end
|
31
|
+
|
32
|
+
def samples
|
33
|
+
if @samples.nil?
|
34
|
+
if sample_info.nil?
|
35
|
+
@samples = self.cohort.collect{|g| g.jobname }
|
36
|
+
else
|
37
|
+
@samples = sample_info.keys
|
38
|
+
end
|
39
|
+
Sample.setup(@samples, self)
|
40
|
+
@samples.study = self
|
41
|
+
end
|
42
|
+
@samples
|
43
|
+
end
|
44
|
+
|
45
|
+
def has_cnv?
|
46
|
+
study.has_cnv? and study.cnv_cohort.include? self
|
47
|
+
end
|
48
|
+
|
49
|
+
def has_mutations?
|
50
|
+
study.cohort and study.cohort.include? self
|
51
|
+
end
|
52
|
+
|
53
|
+
def match_samples(list)
|
54
|
+
if donor_id_field = (sample_info = self.sample_info).fields.select{|f| f =~ /donor\s+id/i}.first
|
55
|
+
list_donors = sample_info.select(list).slice(donor_id_field).values.compact.flatten
|
56
|
+
list_donor_samples = sample_info.select(list_donors).keys
|
57
|
+
list = list_donor_samples.annotate((list + list_donor_samples).uniq)
|
58
|
+
end
|
59
|
+
list
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'rbbt/entity/snp'
|
2
|
+
|
3
|
+
#require 'rbbt/entity/study/snp/samples'
|
4
|
+
|
5
|
+
module StudyWorkflow
|
6
|
+
end
|
7
|
+
|
8
|
+
module Study
|
9
|
+
def has_snp?
|
10
|
+
dir.snp.exists?
|
11
|
+
end
|
12
|
+
|
13
|
+
def snp_files
|
14
|
+
@snp_files ||= dir.snp.find.glob("*")
|
15
|
+
end
|
16
|
+
|
17
|
+
def snp_cohort
|
18
|
+
if @snp_cohort.nil?
|
19
|
+
@snp_cohort = {}
|
20
|
+
snp_files.each do |f|
|
21
|
+
sample = File.basename(f)
|
22
|
+
Sample.setup(sample, self)
|
23
|
+
snps = Open.read(f).split("\n").sort
|
24
|
+
SNP.setup(snps)
|
25
|
+
@snp_cohort[sample] = snps
|
26
|
+
end
|
27
|
+
end
|
28
|
+
@snp_cohort
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
module Study
|
33
|
+
|
34
|
+
def snp_index
|
35
|
+
local_persist_tsv("SNP2Samples", "SNP2Samples", {}, :persist => true, :serializer => :clean) do |data|
|
36
|
+
|
37
|
+
require 'progress-monitor'
|
38
|
+
Progress.monitor "SNP files", :stack_depth => 0
|
39
|
+
snp_files.each do |file|
|
40
|
+
file = file.to_s
|
41
|
+
sample = File.basename file
|
42
|
+
File.open(file.to_s) do |f|
|
43
|
+
while line = f.gets
|
44
|
+
snp = line.strip
|
45
|
+
snp, allele = snp.split ":"
|
46
|
+
snp_str = data[snp]
|
47
|
+
|
48
|
+
if snp_str.nil?
|
49
|
+
snp_str = ""
|
50
|
+
else
|
51
|
+
snp_str += "\t"
|
52
|
+
end
|
53
|
+
|
54
|
+
if allele
|
55
|
+
snp_str << sample << ":" << allele
|
56
|
+
else
|
57
|
+
snp_str << sample
|
58
|
+
end
|
59
|
+
data[snp] = snp_str
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
TSV.setup data
|
65
|
+
data.key_field = "RS ID"
|
66
|
+
data.fields = ["Sample"]
|
67
|
+
data.type = :flat
|
68
|
+
data.serializer = :list
|
69
|
+
data
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
property :samples_with_snp => :single2array do |snp|
|
74
|
+
Sample.setup((snp_index[snp] || []).collect{|s| s.split(":").first}, self)
|
75
|
+
end
|
76
|
+
|
77
|
+
property :samples_with_homozygous_snp => :single2array do |snp|
|
78
|
+
Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "2"}.collect{|s,g| s}, self)
|
79
|
+
end
|
80
|
+
|
81
|
+
property :samples_with_heterozygous_snp => :single2array do |snp|
|
82
|
+
Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "1"}.collect{|s,g| s}, self)
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rbbt/util/misc'
|
3
|
+
|
4
|
+
require 'rbbt/entity'
|
5
|
+
require 'rbbt/resource'
|
6
|
+
require 'rbbt/workflow'
|
7
|
+
|
8
|
+
Workflow.require_workflow "Genomics"
|
9
|
+
|
10
|
+
require 'rbbt/entity/study'
|
11
|
+
require 'rbbt/entity/study/knowledge_base'
|
12
|
+
require 'rbbt/entity/study/samples'
|
13
|
+
require 'rbbt/expression/matrix'
|
14
|
+
|
15
|
+
module StudyWorkflow
|
16
|
+
extend Workflow
|
17
|
+
|
18
|
+
class << self
|
19
|
+
attr_accessor :study
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.workdir
|
23
|
+
@workdir ||= Rbbt.var.jobs["Study"].find
|
24
|
+
end
|
25
|
+
|
26
|
+
helper :study do
|
27
|
+
@study
|
28
|
+
end
|
29
|
+
|
30
|
+
helper :dir do
|
31
|
+
study.dir
|
32
|
+
end
|
33
|
+
|
34
|
+
helper :organism do
|
35
|
+
study.metadata[:organism]
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.job(*args)
|
39
|
+
super(*args).tap{|s| s.instance_variable_set("@study", @study) }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
module Study
|
44
|
+
extend Entity
|
45
|
+
extend Resource
|
46
|
+
include LocalPersist
|
47
|
+
|
48
|
+
class << self
|
49
|
+
attr_accessor :study_dir
|
50
|
+
def study_dir
|
51
|
+
@study_dir ||= begin
|
52
|
+
case
|
53
|
+
when (not defined?(Rbbt))
|
54
|
+
File.join(ENV["HOME"], '.studies')
|
55
|
+
when Rbbt.etc.study_dir.exists?
|
56
|
+
Rbbt.etc.study_dir.read.chomp
|
57
|
+
else
|
58
|
+
Rbbt.studies.find
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
attr_accessor :workflow, :dir
|
65
|
+
|
66
|
+
def job(task, *args)
|
67
|
+
name, inputs = args
|
68
|
+
if inputs.nil? and Hash === name
|
69
|
+
inputs = name
|
70
|
+
name = nil
|
71
|
+
end
|
72
|
+
name = self if name.nil? or name == :self or name == "self"
|
73
|
+
step = workflow.job(task, name, {:organism => metadata[:organism], :watson => metadata[:watson]}.merge(inputs || {}))
|
74
|
+
step.instance_variable_set(:@study, self)
|
75
|
+
step
|
76
|
+
end
|
77
|
+
|
78
|
+
def workflow(&block)
|
79
|
+
if block_given?
|
80
|
+
@workflow.instance_eval &block
|
81
|
+
else
|
82
|
+
@workflow
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.annotation_repo
|
87
|
+
@annotation_repo ||= Rbbt.var.cache.annotation_repo.find
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.extended(base)
|
91
|
+
setup_file = File.join(base.dir, 'setup.rb')
|
92
|
+
base.workflow = StudyWorkflow.clone
|
93
|
+
base.workflow.study = base
|
94
|
+
if File.exists? setup_file
|
95
|
+
base.instance_eval Open.read(setup_file), setup_file
|
96
|
+
end
|
97
|
+
base.local_persist_dir = base.dir.var.cache.persistence.find
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.studies
|
101
|
+
Dir.glob(File.join(Path === study_dir ? study_dir.find : study_dir, '*')).
|
102
|
+
select{|f| File.directory? f}.sort.collect{|s| Study.setup(File.basename(s))}
|
103
|
+
end
|
104
|
+
|
105
|
+
def dir
|
106
|
+
if @dir.nil?
|
107
|
+
@dir = Path.setup(File.join(Study.study_dir, self))
|
108
|
+
@dir.resource = Study
|
109
|
+
end
|
110
|
+
@dir
|
111
|
+
end
|
112
|
+
|
113
|
+
def metadata
|
114
|
+
@metadata ||= (dir["metadata.yaml"].yaml.extend IndiferentHash)
|
115
|
+
end
|
116
|
+
|
117
|
+
def users
|
118
|
+
@users ||= metadata[:users] || []
|
119
|
+
end
|
120
|
+
|
121
|
+
#{{{ Attributes
|
122
|
+
attr_accessor :organism
|
123
|
+
def organism
|
124
|
+
@organism ||= metadata["organism"]
|
125
|
+
end
|
126
|
+
|
127
|
+
def matrix_file(name)
|
128
|
+
dir.matrices[name.to_s].find
|
129
|
+
end
|
130
|
+
|
131
|
+
def matrices
|
132
|
+
dir.matrices.glob('*').collect{|f| f.basename}
|
133
|
+
end
|
134
|
+
|
135
|
+
def matrix(type, format = "Ensembl Gene ID", organism = nil)
|
136
|
+
organism = self.metadata[:organism] if organism.nil?
|
137
|
+
raise "No matrices defined for study #{ self }" unless defined? matrices.empty?
|
138
|
+
raise "No type specified" if type.nil?
|
139
|
+
type = type.to_s
|
140
|
+
raise "No matrix #{ type } defined for study #{ self }" unless matrices.include? type
|
141
|
+
data = dir.matrices[type].data.find if dir.matrices[type].data.exists?
|
142
|
+
if dir.matrices[type].identifiers.exists?
|
143
|
+
identifiers = dir.matrices[type].identifiers.find
|
144
|
+
else
|
145
|
+
identifiers = Organism.identifiers(organism).find
|
146
|
+
end
|
147
|
+
samples = dir.matrices[type].samples.find if dir.matrices[type].samples.exists?
|
148
|
+
samples = dir.samples.find if samples.nil? and dir.samples.exist?
|
149
|
+
Matrix.new(data, identifiers, samples, format, organism)
|
150
|
+
end
|
151
|
+
end
|