rbbt-study 0.2.30 → 0.2.31

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,140 +0,0 @@
1
- input :cutoff, :integer, "Pixels of image", 2
2
- input :size, :integer, "Pixels of image", 14
3
- task :gene_mutation_plot => :binary do |cutoff, size|
4
- png_file = file(study + ".png")
5
- FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
6
- study.R "
7
- library(ggplot2)
8
- library(plyr)
9
- library(reshape)
10
-
11
- layer.mutations = rbbt.SE.plot.mutations('#{study}', cutoff=#{cutoff});
12
- p <- ggplot() + layer.mutations
13
-
14
- p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
15
-
16
- ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
17
- "
18
- Open.read(png_file, :mode => 'rb')
19
- end
20
-
21
-
22
- input :database, :string, "Database code", :kegg
23
- input :size, :integer, "Pixels of image", 14
24
- task :pathway_mutation_plot => :binary do |database,size|
25
- png_file = file(study + ".png")
26
- FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
27
- study.R "
28
- library(ggplot2)
29
- library(plyr)
30
- library(reshape)
31
-
32
-
33
- study = '#{study}'
34
- # Sample mutations
35
- sample.mutated.genes = rbbt.SE.sample.mutated.genes(study)
36
- sample.mutated.genes$Sample = rownames(sample.mutated.genes)
37
-
38
- # Pathway enrichment
39
- pathway.enrichment = rbbt.ruby.substitutions(
40
- \"
41
- require 'rbbt/workflow'
42
- require 'rbbt/entity'
43
- require 'rbbt/entity/gene'
44
- require 'rbbt/sources/pfam'
45
- require 'rbbt/sources/kegg'
46
- require 'rbbt/sources/go'
47
-
48
- YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
49
-
50
- Workflow.require_workflow 'StudyExplorer'
51
-
52
- study = Study.setup('STUDY')
53
-
54
- pathways = study.job(:mutation_pathway_enrichment, study, :baseline => :pathway_base_counts, :database => '#{database}', :fdr => false).run.select('p-value'){|pvalue| pvalue = pvalue.first.to_f if Array === pvalue; pvalue < 0.2}
55
- pathways.add_field 'Name' do |pathway, values|
56
- [pathway.name]
57
- end
58
-
59
- pathways.add_field 'Gene' do |pathway, values|
60
- values['Ensembl Gene ID'].name
61
- end
62
-
63
- pathways = pathways.select('Name'){|name| name.first.to_s !~ /cancer|olfactory|glioma|melanoma|malaria|leukemia|carcinoma|sarcoma/i}
64
-
65
- \", substitutions=list(STUDY=study));
66
-
67
-
68
- # Sample pathway mutations
69
- find.mutated.pathways.for.sample <- function(x, pathway.info){
70
- all.genes = names(x);
71
- genes = all.genes[x==TRUE];
72
- ddply(pathway.info, 'Name', function(x){pathway.genes = unlist(strsplit(x$Gene, '\\\\|')); if (length(intersect(genes, pathway.genes)) > 0){TRUE}else{FALSE}})
73
- }
74
- sample.pathway.mutations = ddply(sample.mutated.genes, 'Sample', find.mutated.pathways.for.sample, pathway.info = pathway.enrichment)
75
- names(sample.pathway.mutations) = c('Sample', 'Pathway', 'Mutated')
76
-
77
- p <- ggplot(sample.pathway.mutations) + geom_tile(aes(x=Sample, y=Pathway, alpha=Mutated))
78
-
79
- p <- rbbt.SE.plot.sort.by.pathway.mutations(p)
80
-
81
-
82
- # Mark repeated genes
83
-
84
-
85
- d = p$data
86
- d$Exclusive = FALSE
87
-
88
- pathway.genes = list();
89
- for(pathway in levels(d$Pathway)){
90
- pathway.genes[pathway] = strsplit(pathway.enrichment[pathway.enrichment[,'Name'] == pathway, 'Gene'], '\\\\|')
91
- }
92
-
93
- find.exclusive.pathway.genes <- function(data, pathways){
94
- found.genes = c();
95
- exclusive.pathway.genes = list();
96
- sample = as.character(unique(data$Sample));
97
- for(pathway in pathways){
98
- current.pathway.genes = pathway.genes[[pathway]];
99
- sample.genes = names(sample.mutated.genes)[sample.mutated.genes[sample,] == TRUE]
100
- sample.pathway.genes = intersect(current.pathway.genes, sample.genes);
101
- exclusive.genes = setdiff(sample.pathway.genes, found.genes);
102
- found.genes = c(found.genes, exclusive.genes)
103
- exclusive.pathway.genes[[pathway]] = exclusive.genes
104
- }
105
-
106
- return(exclusive.pathway.genes);
107
- }
108
-
109
- exclusive.pathway.genes = dlply(d, 'Sample', find.exclusive.pathway.genes, pathways = levels(d$Pathway))
110
-
111
- for( sample in names(exclusive.pathway.genes)){
112
- pathway.exclusive.genes = exclusive.pathway.genes[[sample]];
113
- for( pathway in names(pathway.exclusive.genes)){
114
- if (length(pathway.exclusive.genes[[pathway]]) > 0){
115
- print(sample)
116
- print(pathway)
117
- d[(d$Sample == sample & d$Pathway == pathway), 'Exclusive'] = TRUE
118
- }
119
- }
120
- }
121
-
122
- p$data = d
123
-
124
-
125
- p <- p + aes(fill=Exclusive)
126
-
127
- p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
128
-
129
- p
130
-
131
-
132
-
133
-
134
- ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
135
- "
136
- Open.read(png_file, :mode => 'rb')
137
- end
138
-
139
-
140
-
@@ -1,78 +0,0 @@
1
- module Study
2
- end
3
-
4
- module Sample
5
- extend Entity
6
-
7
- annotation :study
8
-
9
- self.format = ["Sample ID"]
10
-
11
- def dir
12
- return nil if study.nil?
13
- return study.dir if study.respond_to? :dir
14
- begin
15
- Study.setup(study).dir
16
- rescue
17
- Log.warn "Error accessing sample dir from study: #{$!.message}"
18
- nil
19
- end
20
- end
21
-
22
- def organism
23
- return nil if study.nil?
24
- study.organism
25
- end
26
-
27
- def study
28
- @study ||= begin
29
- study = info[:study]
30
- if study.nil?
31
- study = Study.identify_study(self)
32
- self.study = study
33
- end
34
- study
35
- end
36
- end
37
-
38
- end
39
-
40
- module Study
41
-
42
- def sample_info
43
- return nil unless dir.samples.exists?
44
- @sample_info ||= dir.samples.tsv.tap{|tsv| tsv.entity_options = {:study => self }}
45
- end
46
-
47
- def samples
48
- @samples ||= begin
49
- samples = local_persist("Sample", :array) do
50
- if sample_info.nil?
51
- self.cohort.collect{|g| g.jobname }
52
- else
53
- sample_info.keys
54
- end
55
- end
56
- Sample.setup(samples, :study => self)
57
- samples.study = self
58
- samples
59
- end
60
- end
61
-
62
- def match_samples(list)
63
- if donor_id_field = (sample_info = self.sample_info).fields.select{|f| f =~ /donor\s+id/i}.first
64
- list_donors = sample_info.select(list).slice(donor_id_field).values.compact.flatten
65
- list_donor_samples = sample_info.select(list_donors).keys
66
- list = list_donor_samples.annotate((list + list_donor_samples).uniq)
67
- end
68
- list
69
- end
70
-
71
- def self.identify_study(samples)
72
- samples = Array === samples ? samples.flatten : [samples]
73
-
74
- studies = Study.studies.select{|study| Study.setup(study); (study.samples & samples).any? }
75
-
76
- studies.first
77
- end
78
- end
@@ -1,87 +0,0 @@
1
- require 'rbbt/entity/snp'
2
-
3
- #require 'rbbt/entity/study/snp/samples'
4
-
5
- module StudyWorkflow
6
- end
7
-
8
- module Study
9
- def has_snp?
10
- dir.snp.exists?
11
- end
12
-
13
- def snp_files
14
- @snp_files ||= dir.snp.find.glob("*")
15
- end
16
-
17
- def snp_cohort
18
- if @snp_cohort.nil?
19
- @snp_cohort = {}
20
- snp_files.each do |f|
21
- sample = File.basename(f)
22
- Sample.setup(sample, self)
23
- snps = Open.read(f).split("\n").sort
24
- SNP.setup(snps)
25
- @snp_cohort[sample] = snps
26
- end
27
- end
28
- @snp_cohort
29
- end
30
- end
31
-
32
- module Study
33
-
34
- def snp_index
35
- local_persist_tsv("SNP2Samples", "SNP2Samples", {}, :persist => true, :serializer => :clean) do |data|
36
-
37
- require 'progress-monitor'
38
- Progress.monitor "SNP files", :stack_depth => 0
39
- snp_files.each do |file|
40
- file = file.to_s
41
- sample = File.basename file
42
- File.open(file.to_s) do |f|
43
- while line = f.gets
44
- snp = line.strip
45
- snp, allele = snp.split ":"
46
- snp_str = data[snp]
47
-
48
- if snp_str.nil?
49
- snp_str = ""
50
- else
51
- snp_str += "\t"
52
- end
53
-
54
- if allele
55
- snp_str << sample << ":" << allele
56
- else
57
- snp_str << sample
58
- end
59
- data[snp] = snp_str
60
- end
61
- end
62
- end
63
-
64
- TSV.setup data
65
- data.key_field = "RS ID"
66
- data.fields = ["Sample"]
67
- data.type = :flat
68
- data.serializer = :list
69
- data
70
- end
71
- end
72
-
73
- property :samples_with_snp => :single2array do |snp|
74
- Sample.setup((snp_index[snp] || []).collect{|s| s.split(":").first}, self)
75
- end
76
-
77
- property :samples_with_homozygous_snp => :single2array do |snp|
78
- Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "2"}.collect{|s,g| s}, self)
79
- end
80
-
81
- property :samples_with_heterozygous_snp => :single2array do |snp|
82
- Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "1"}.collect{|s,g| s}, self)
83
- end
84
-
85
-
86
-
87
- end