rbbt-study 0.2.30 → 0.2.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,140 +0,0 @@
1
- input :cutoff, :integer, "Pixels of image", 2
2
- input :size, :integer, "Pixels of image", 14
3
- task :gene_mutation_plot => :binary do |cutoff, size|
4
- png_file = file(study + ".png")
5
- FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
6
- study.R "
7
- library(ggplot2)
8
- library(plyr)
9
- library(reshape)
10
-
11
- layer.mutations = rbbt.SE.plot.mutations('#{study}', cutoff=#{cutoff});
12
- p <- ggplot() + layer.mutations
13
-
14
- p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
15
-
16
- ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
17
- "
18
- Open.read(png_file, :mode => 'rb')
19
- end
20
-
21
-
22
- input :database, :string, "Database code", :kegg
23
- input :size, :integer, "Pixels of image", 14
24
- task :pathway_mutation_plot => :binary do |database,size|
25
- png_file = file(study + ".png")
26
- FileUtils.mkdir_p File.dirname png_file unless File.exists? File.dirname png_file
27
- study.R "
28
- library(ggplot2)
29
- library(plyr)
30
- library(reshape)
31
-
32
-
33
- study = '#{study}'
34
- # Sample mutations
35
- sample.mutated.genes = rbbt.SE.sample.mutated.genes(study)
36
- sample.mutated.genes$Sample = rownames(sample.mutated.genes)
37
-
38
- # Pathway enrichment
39
- pathway.enrichment = rbbt.ruby.substitutions(
40
- \"
41
- require 'rbbt/workflow'
42
- require 'rbbt/entity'
43
- require 'rbbt/entity/gene'
44
- require 'rbbt/sources/pfam'
45
- require 'rbbt/sources/kegg'
46
- require 'rbbt/sources/go'
47
-
48
- YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
49
-
50
- Workflow.require_workflow 'StudyExplorer'
51
-
52
- study = Study.setup('STUDY')
53
-
54
- pathways = study.job(:mutation_pathway_enrichment, study, :baseline => :pathway_base_counts, :database => '#{database}', :fdr => false).run.select('p-value'){|pvalue| pvalue = pvalue.first.to_f if Array === pvalue; pvalue < 0.2}
55
- pathways.add_field 'Name' do |pathway, values|
56
- [pathway.name]
57
- end
58
-
59
- pathways.add_field 'Gene' do |pathway, values|
60
- values['Ensembl Gene ID'].name
61
- end
62
-
63
- pathways = pathways.select('Name'){|name| name.first.to_s !~ /cancer|olfactory|glioma|melanoma|malaria|leukemia|carcinoma|sarcoma/i}
64
-
65
- \", substitutions=list(STUDY=study));
66
-
67
-
68
- # Sample pathway mutations
69
- find.mutated.pathways.for.sample <- function(x, pathway.info){
70
- all.genes = names(x);
71
- genes = all.genes[x==TRUE];
72
- ddply(pathway.info, 'Name', function(x){pathway.genes = unlist(strsplit(x$Gene, '\\\\|')); if (length(intersect(genes, pathway.genes)) > 0){TRUE}else{FALSE}})
73
- }
74
- sample.pathway.mutations = ddply(sample.mutated.genes, 'Sample', find.mutated.pathways.for.sample, pathway.info = pathway.enrichment)
75
- names(sample.pathway.mutations) = c('Sample', 'Pathway', 'Mutated')
76
-
77
- p <- ggplot(sample.pathway.mutations) + geom_tile(aes(x=Sample, y=Pathway, alpha=Mutated))
78
-
79
- p <- rbbt.SE.plot.sort.by.pathway.mutations(p)
80
-
81
-
82
- # Mark repeated genes
83
-
84
-
85
- d = p$data
86
- d$Exclusive = FALSE
87
-
88
- pathway.genes = list();
89
- for(pathway in levels(d$Pathway)){
90
- pathway.genes[pathway] = strsplit(pathway.enrichment[pathway.enrichment[,'Name'] == pathway, 'Gene'], '\\\\|')
91
- }
92
-
93
- find.exclusive.pathway.genes <- function(data, pathways){
94
- found.genes = c();
95
- exclusive.pathway.genes = list();
96
- sample = as.character(unique(data$Sample));
97
- for(pathway in pathways){
98
- current.pathway.genes = pathway.genes[[pathway]];
99
- sample.genes = names(sample.mutated.genes)[sample.mutated.genes[sample,] == TRUE]
100
- sample.pathway.genes = intersect(current.pathway.genes, sample.genes);
101
- exclusive.genes = setdiff(sample.pathway.genes, found.genes);
102
- found.genes = c(found.genes, exclusive.genes)
103
- exclusive.pathway.genes[[pathway]] = exclusive.genes
104
- }
105
-
106
- return(exclusive.pathway.genes);
107
- }
108
-
109
- exclusive.pathway.genes = dlply(d, 'Sample', find.exclusive.pathway.genes, pathways = levels(d$Pathway))
110
-
111
- for( sample in names(exclusive.pathway.genes)){
112
- pathway.exclusive.genes = exclusive.pathway.genes[[sample]];
113
- for( pathway in names(pathway.exclusive.genes)){
114
- if (length(pathway.exclusive.genes[[pathway]]) > 0){
115
- print(sample)
116
- print(pathway)
117
- d[(d$Sample == sample & d$Pathway == pathway), 'Exclusive'] = TRUE
118
- }
119
- }
120
- }
121
-
122
- p$data = d
123
-
124
-
125
- p <- p + aes(fill=Exclusive)
126
-
127
- p <- p + opts(axis.text.x=theme_text(angle=90), panel.background = theme_rect(fill='white', colour='steelblue'))
128
-
129
- p
130
-
131
-
132
-
133
-
134
- ggsave(p, filename='#{png_file}', height=#{size}, width=#{size});
135
- "
136
- Open.read(png_file, :mode => 'rb')
137
- end
138
-
139
-
140
-
@@ -1,78 +0,0 @@
1
- module Study
2
- end
3
-
4
- module Sample
5
- extend Entity
6
-
7
- annotation :study
8
-
9
- self.format = ["Sample ID"]
10
-
11
- def dir
12
- return nil if study.nil?
13
- return study.dir if study.respond_to? :dir
14
- begin
15
- Study.setup(study).dir
16
- rescue
17
- Log.warn "Error accessing sample dir from study: #{$!.message}"
18
- nil
19
- end
20
- end
21
-
22
- def organism
23
- return nil if study.nil?
24
- study.organism
25
- end
26
-
27
- def study
28
- @study ||= begin
29
- study = info[:study]
30
- if study.nil?
31
- study = Study.identify_study(self)
32
- self.study = study
33
- end
34
- study
35
- end
36
- end
37
-
38
- end
39
-
40
- module Study
41
-
42
- def sample_info
43
- return nil unless dir.samples.exists?
44
- @sample_info ||= dir.samples.tsv.tap{|tsv| tsv.entity_options = {:study => self }}
45
- end
46
-
47
- def samples
48
- @samples ||= begin
49
- samples = local_persist("Sample", :array) do
50
- if sample_info.nil?
51
- self.cohort.collect{|g| g.jobname }
52
- else
53
- sample_info.keys
54
- end
55
- end
56
- Sample.setup(samples, :study => self)
57
- samples.study = self
58
- samples
59
- end
60
- end
61
-
62
- def match_samples(list)
63
- if donor_id_field = (sample_info = self.sample_info).fields.select{|f| f =~ /donor\s+id/i}.first
64
- list_donors = sample_info.select(list).slice(donor_id_field).values.compact.flatten
65
- list_donor_samples = sample_info.select(list_donors).keys
66
- list = list_donor_samples.annotate((list + list_donor_samples).uniq)
67
- end
68
- list
69
- end
70
-
71
- def self.identify_study(samples)
72
- samples = Array === samples ? samples.flatten : [samples]
73
-
74
- studies = Study.studies.select{|study| Study.setup(study); (study.samples & samples).any? }
75
-
76
- studies.first
77
- end
78
- end
@@ -1,87 +0,0 @@
1
- require 'rbbt/entity/snp'
2
-
3
- #require 'rbbt/entity/study/snp/samples'
4
-
5
- module StudyWorkflow
6
- end
7
-
8
- module Study
9
- def has_snp?
10
- dir.snp.exists?
11
- end
12
-
13
- def snp_files
14
- @snp_files ||= dir.snp.find.glob("*")
15
- end
16
-
17
- def snp_cohort
18
- if @snp_cohort.nil?
19
- @snp_cohort = {}
20
- snp_files.each do |f|
21
- sample = File.basename(f)
22
- Sample.setup(sample, self)
23
- snps = Open.read(f).split("\n").sort
24
- SNP.setup(snps)
25
- @snp_cohort[sample] = snps
26
- end
27
- end
28
- @snp_cohort
29
- end
30
- end
31
-
32
- module Study
33
-
34
- def snp_index
35
- local_persist_tsv("SNP2Samples", "SNP2Samples", {}, :persist => true, :serializer => :clean) do |data|
36
-
37
- require 'progress-monitor'
38
- Progress.monitor "SNP files", :stack_depth => 0
39
- snp_files.each do |file|
40
- file = file.to_s
41
- sample = File.basename file
42
- File.open(file.to_s) do |f|
43
- while line = f.gets
44
- snp = line.strip
45
- snp, allele = snp.split ":"
46
- snp_str = data[snp]
47
-
48
- if snp_str.nil?
49
- snp_str = ""
50
- else
51
- snp_str += "\t"
52
- end
53
-
54
- if allele
55
- snp_str << sample << ":" << allele
56
- else
57
- snp_str << sample
58
- end
59
- data[snp] = snp_str
60
- end
61
- end
62
- end
63
-
64
- TSV.setup data
65
- data.key_field = "RS ID"
66
- data.fields = ["Sample"]
67
- data.type = :flat
68
- data.serializer = :list
69
- data
70
- end
71
- end
72
-
73
- property :samples_with_snp => :single2array do |snp|
74
- Sample.setup((snp_index[snp] || []).collect{|s| s.split(":").first}, self)
75
- end
76
-
77
- property :samples_with_homozygous_snp => :single2array do |snp|
78
- Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "2"}.collect{|s,g| s}, self)
79
- end
80
-
81
- property :samples_with_heterozygous_snp => :single2array do |snp|
82
- Sample.setup((snp_index[snp] || []).collect{|s| s.split(":")}.select{|s,g| g == "1"}.collect{|s,g| s}, self)
83
- end
84
-
85
-
86
-
87
- end