rbbt-study 0.2.30 → 0.2.31
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- metadata +2 -24
- data/lib/rbbt/entity/study.rb +0 -172
- data/lib/rbbt/entity/study/cnv.rb +0 -170
- data/lib/rbbt/entity/study/cnv/genes.rb +0 -28
- data/lib/rbbt/entity/study/cnv/knowledge_base.rb +0 -39
- data/lib/rbbt/entity/study/cnv/samples.rb +0 -54
- data/lib/rbbt/entity/study/enrichment.rb +0 -418
- data/lib/rbbt/entity/study/expression.rb +0 -24
- data/lib/rbbt/entity/study/features.rb +0 -17
- data/lib/rbbt/entity/study/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes.rb +0 -134
- data/lib/rbbt/entity/study/genotypes/enrichment.rb +0 -56
- data/lib/rbbt/entity/study/genotypes/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes/knowledge_base.rb +0 -81
- data/lib/rbbt/entity/study/genotypes/mutations.rb +0 -34
- data/lib/rbbt/entity/study/genotypes/samples.rb +0 -28
- data/lib/rbbt/entity/study/knowledge_base.rb +0 -35
- data/lib/rbbt/entity/study/methylation.rb +0 -90
- data/lib/rbbt/entity/study/methylation/samples.rb +0 -31
- data/lib/rbbt/entity/study/mutations.rb +0 -259
- data/lib/rbbt/entity/study/plots.rb +0 -140
- data/lib/rbbt/entity/study/samples.rb +0 -78
- data/lib/rbbt/entity/study/snp.rb +0 -87
@@ -1,140 +0,0 @@
|
|
1
|
-
input :cutoff, :integer, "Pixels of image", 2
|
2
|
-
input :size, :integer, "Pixels of image", 14
|
3
|
-
task :gene_mutation_plot => :binary do |cutoff, size|
|
4
|
-
png_file = file(study + ".png")
|
5
|
-
FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
|
6
|
-
study.R "
|
7
|
-
library(ggplot2)
|
8
|
-
library(plyr)
|
9
|
-
library(reshape)
|
10
|
-
|
11
|
-
layer.mutations = rbbt.SE.plot.mutations('#{study}', cutoff=#{cutoff});
|
12
|
-
p <- ggplot() + layer.mutations
|
13
|
-
|
14
|
-
p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
|
15
|
-
|
16
|
-
ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
|
17
|
-
"
|
18
|
-
Open.read(png_file, :mode => 'rb')
|
19
|
-
end
|
20
|
-
|
21
|
-
|
22
|
-
input :database, :string, "Database code", :kegg
|
23
|
-
input :size, :integer, "Pixels of image", 14
|
24
|
-
task :pathway_mutation_plot => :binary do |database,size|
|
25
|
-
png_file = file(study + ".png")
|
26
|
-
FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
|
27
|
-
study.R "
|
28
|
-
library(ggplot2)
|
29
|
-
library(plyr)
|
30
|
-
library(reshape)
|
31
|
-
|
32
|
-
|
33
|
-
study = '#{study}'
|
34
|
-
# Sample mutations
|
35
|
-
sample.mutated.genes = rbbt.SE.sample.mutated.genes(study)
|
36
|
-
sample.mutated.genes$Sample = rownames(sample.mutated.genes)
|
37
|
-
|
38
|
-
# Pathway enrichment
|
39
|
-
pathway.enrichment = rbbt.ruby.substitutions(
|
40
|
-
\"
|
41
|
-
require 'rbbt/workflow'
|
42
|
-
require 'rbbt/entity'
|
43
|
-
require 'rbbt/entity/gene'
|
44
|
-
require 'rbbt/sources/pfam'
|
45
|
-
require 'rbbt/sources/kegg'
|
46
|
-
require 'rbbt/sources/go'
|
47
|
-
|
48
|
-
YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
|
49
|
-
|
50
|
-
Workflow.require_workflow 'StudyExplorer'
|
51
|
-
|
52
|
-
study = Study.setup('STUDY')
|
53
|
-
|
54
|
-
pathways = study.job(:mutation_pathway_enrichment, study, :baseline => :pathway_base_counts, :database => '#{database}', :fdr => false).run.select('p-value'){|pvalue| pvalue = pvalue.first.to_f if Array === pvalue; pvalue < 0.2}
|
55
|
-
pathways.add_field 'Name' do |pathway, values|
|
56
|
-
[pathway.name]
|
57
|
-
end
|
58
|
-
|
59
|
-
pathways.add_field 'Gene' do |pathway, values|
|
60
|
-
values['Ensembl Gene ID'].name
|
61
|
-
end
|
62
|
-
|
63
|
-
pathways = pathways.select('Name'){|name| name.first.to_s !~ /cancer|olfactory|glioma|melanoma|malaria|leukemia|carcinoma|sarcoma/i}
|
64
|
-
|
65
|
-
\", substitutions=list(STUDY=study));
|
66
|
-
|
67
|
-
|
68
|
-
# Sample pathway mutations
|
69
|
-
find.mutated.pathways.for.sample <- function(x, pathway.info){
|
70
|
-
all.genes = names(x);
|
71
|
-
genes = all.genes[x==TRUE];
|
72
|
-
ddply(pathway.info, 'Name', function(x){pathway.genes = unlist(strsplit(x$Gene, '\\\\|')); if (length(intersect(genes, pathway.genes)) > 0){TRUE}else{FALSE}})
|
73
|
-
}
|
74
|
-
sample.pathway.mutations = ddply(sample.mutated.genes, 'Sample', find.mutated.pathways.for.sample, pathway.info = pathway.enrichment)
|
75
|
-
names(sample.pathway.mutations) = c('Sample', 'Pathway', 'Mutated')
|
76
|
-
|
77
|
-
p <- ggplot(sample.pathway.mutations) + geom_tile(aes(x=Sample, y=Pathway, alpha=Mutated))
|
78
|
-
|
79
|
-
p <- rbbt.SE.plot.sort.by.pathway.mutations(p)
|
80
|
-
|
81
|
-
|
82
|
-
# Mark repeated genes
|
83
|
-
|
84
|
-
|
85
|
-
d = p$data
|
86
|
-
d$Exclusive = FALSE
|
87
|
-
|
88
|
-
pathway.genes = list();
|
89
|
-
for(pathway in levels(d$Pathway)){
|
90
|
-
pathway.genes[pathway] = strsplit(pathway.enrichment[pathway.enrichment[,'Name'] == pathway, 'Gene'], '\\\\|')
|
91
|
-
}
|
92
|
-
|
93
|
-
find.exclusive.pathway.genes <- function(data, pathways){
|
94
|
-
found.genes = c();
|
95
|
-
exclusive.pathway.genes = list();
|
96
|
-
sample = as.character(unique(data$Sample));
|
97
|
-
for(pathway in pathways){
|
98
|
-
current.pathway.genes = pathway.genes[[pathway]];
|
99
|
-
sample.genes = names(sample.mutated.genes)[sample.mutated.genes[sample,] == TRUE]
|
100
|
-
sample.pathway.genes = intersect(current.pathway.genes, sample.genes);
|
101
|
-
exclusive.genes = setdiff(sample.pathway.genes, found.genes);
|
102
|
-
found.genes = c(found.genes, exclusive.genes)
|
103
|
-
exclusive.pathway.genes[[pathway]] = exclusive.genes
|
104
|
-
}
|
105
|
-
|
106
|
-
return(exclusive.pathway.genes);
|
107
|
-
}
|
108
|
-
|
109
|
-
exclusive.pathway.genes = dlply(d, 'Sample', find.exclusive.pathway.genes, pathways = levels(d$Pathway))
|
110
|
-
|
111
|
-
for( sample in names(exclusive.pathway.genes)){
|
112
|
-
pathway.exclusive.genes = exclusive.pathway.genes[[sample]];
|
113
|
-
for( pathway in names(pathway.exclusive.genes)){
|
114
|
-
if (length(pathway.exclusive.genes[[pathway]]) > 0){
|
115
|
-
print(sample)
|
116
|
-
print(pathway)
|
117
|
-
d[(d$Sample == sample & d$Pathway == pathway), 'Exclusive'] = TRUE
|
118
|
-
}
|
119
|
-
}
|
120
|
-
}
|
121
|
-
|
122
|
-
p$data = d
|
123
|
-
|
124
|
-
|
125
|
-
p <- p + aes(fill=Exclusive)
|
126
|
-
|
127
|
-
p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
|
128
|
-
|
129
|
-
p
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
|
135
|
-
"
|
136
|
-
Open.read(png_file, :mode => 'rb')
|
137
|
-
end
|
138
|
-
|
139
|
-
|
140
|
-
|
@@ -1,78 +0,0 @@
|
|
1
|
-
module Study
|
2
|
-
end
|
3
|
-
|
4
|
-
module Sample
|
5
|
-
extend Entity
|
6
|
-
|
7
|
-
annotation :study
|
8
|
-
|
9
|
-
self.format = ["Sample ID"]
|
10
|
-
|
11
|
-
def dir
|
12
|
-
return nil if study.nil?
|
13
|
-
return study.dir if study.respond_to? :dir
|
14
|
-
begin
|
15
|
-
Study.setup(study).dir
|
16
|
-
rescue
|
17
|
-
Log.warn "Error accessing sample dir from study: #{$!.message}"
|
18
|
-
nil
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def organism
|
23
|
-
return nil if study.nil?
|
24
|
-
study.organism
|
25
|
-
end
|
26
|
-
|
27
|
-
def study
|
28
|
-
@study ||= begin
|
29
|
-
study = info[:study]
|
30
|
-
if study.nil?
|
31
|
-
study = Study.identify_study(self)
|
32
|
-
self.study = study
|
33
|
-
end
|
34
|
-
study
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
module Study
|
41
|
-
|
42
|
-
def sample_info
|
43
|
-
return nil unless dir.samples.exists?
|
44
|
-
@sample_info ||= dir.samples.tsv.tap{|tsv| tsv.entity_options = {:study => self }}
|
45
|
-
end
|
46
|
-
|
47
|
-
def samples
|
48
|
-
@samples ||= begin
|
49
|
-
samples = local_persist("Sample", :array) do
|
50
|
-
if sample_info.nil?
|
51
|
-
self.cohort.collect{|g| g.jobname }
|
52
|
-
else
|
53
|
-
sample_info.keys
|
54
|
-
end
|
55
|
-
end
|
56
|
-
Sample.setup(samples, :study => self)
|
57
|
-
samples.study = self
|
58
|
-
samples
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def match_samples(list)
|
63
|
-
if donor_id_field = (sample_info = self.sample_info).fields.select{|f| f =~ /donor\s+id/i}.first
|
64
|
-
list_donors = sample_info.select(list).slice(donor_id_field).values.compact.flatten
|
65
|
-
list_donor_samples = sample_info.select(list_donors).keys
|
66
|
-
list = list_donor_samples.annotate((list + list_donor_samples).uniq)
|
67
|
-
end
|
68
|
-
list
|
69
|
-
end
|
70
|
-
|
71
|
-
def self.identify_study(samples)
|
72
|
-
samples = Array === samples ? samples.flatten : [samples]
|
73
|
-
|
74
|
-
studies = Study.studies.select{|study| Study.setup(study); (study.samples & samples).any? }
|
75
|
-
|
76
|
-
studies.first
|
77
|
-
end
|
78
|
-
end
|
@@ -1,87 +0,0 @@
|
|
1
|
-
require 'rbbt/entity/snp'
|
2
|
-
|
3
|
-
#require 'rbbt/entity/study/snp/samples'
|
4
|
-
|
5
|
-
module StudyWorkflow
|
6
|
-
end
|
7
|
-
|
8
|
-
module Study
|
9
|
-
def has_snp?
|
10
|
-
dir.snp.exists?
|
11
|
-
end
|
12
|
-
|
13
|
-
def snp_files
|
14
|
-
@snp_files ||= dir.snp.find.glob("*")
|
15
|
-
end
|
16
|
-
|
17
|
-
def snp_cohort
|
18
|
-
if @snp_cohort.nil?
|
19
|
-
@snp_cohort = {}
|
20
|
-
snp_files.each do |f|
|
21
|
-
sample = File.basename(f)
|
22
|
-
Sample.setup(sample, self)
|
23
|
-
snps = Open.read(f).split("\n").sort
|
24
|
-
SNP.setup(snps)
|
25
|
-
@snp_cohort[sample] = snps
|
26
|
-
end
|
27
|
-
end
|
28
|
-
@snp_cohort
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
module Study
|
33
|
-
|
34
|
-
def snp_index
|
35
|
-
local_persist_tsv("SNP2Samples", "SNP2Samples", {}, :persist => true, :serializer => :clean) do |data|
|
36
|
-
|
37
|
-
require 'progress-monitor'
|
38
|
-
Progress.monitor "SNP files", :stack_depth => 0
|
39
|
-
snp_files.each do |file|
|
40
|
-
file = file.to_s
|
41
|
-
sample = File.basename file
|
42
|
-
File.open(file.to_s) do |f|
|
43
|
-
while line = f.gets
|
44
|
-
snp = line.strip
|
45
|
-
snp, allele = snp.split ":"
|
46
|
-
snp_str = data[snp]
|
47
|
-
|
48
|
-
if snp_str.nil?
|
49
|
-
snp_str = ""
|
50
|
-
else
|
51
|
-
snp_str += "\t"
|
52
|
-
end
|
53
|
-
|
54
|
-
if allele
|
55
|
-
snp_str << sample << ":" << allele
|
56
|
-
else
|
57
|
-
snp_str << sample
|
58
|
-
end
|
59
|
-
data[snp] = snp_str
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
TSV.setup data
|
65
|
-
data.key_field = "RS ID"
|
66
|
-
data.fields = ["Sample"]
|
67
|
-
data.type = :flat
|
68
|
-
data.serializer = :list
|
69
|
-
data
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
property :samples_with_snp => :single2array do |snp|
|
74
|
-
Sample.setup((snp_index[snp] || []).collect{|s| s.split(":").first}, self)
|
75
|
-
end
|
76
|
-
|
77
|
-
property :samples_with_homozygous_snp => :single2array do |snp|
|
78
|
-
Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "2"}.collect{|s,g| s}, self)
|
79
|
-
end
|
80
|
-
|
81
|
-
property :samples_with_heterozygous_snp => :single2array do |snp|
|
82
|
-
Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "1"}.collect{|s,g| s}, self)
|
83
|
-
end
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
end
|