rbbt-study 0.2.19 → 0.2.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ODg5YzNlNzAzMDA4NjUzNzNlYTc0MTIxMzI3MmZiNzI4N2VhZjg0Yw==
4
+ MmU0YmUxZDUxOTZhMmU1MjJlMmI3MWUwYzU4NzYxODY1NWZhNjk0NQ==
5
5
  data.tar.gz: !binary |-
6
- ZWNmM2VjZjJhY2FkMGYzM2JlOWMxYjBiYTEwNjA1YjVlYzM2N2Y0Yw==
6
+ NGI4ODVjZThmOTk5ODI2NTRhYzRlN2RlNDA0NTc0ZWIyNDk4NGYzOA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MmU0NjNlZGY0YzM3ZTBkYjQ1YWEyNjNkNzE0ODdkYTE4OWE1MzUxZjg4Y2Ji
10
- NzRjOGZmODI5MTUxMzJlYzMyOWZlOTA0MjBmNmQyZDM1MDhiMzA4MTc1ZDgx
11
- ZmJjZTYwMzM4ZTU3NzQzMzQ5ZWNiNTIwZTk1NjhjNzM0ZDYxMWQ=
9
+ MjI1Yzc5YmZjM2ZhY2NmNzI0Zjk3MTZhNTFlMzM0YWZkZWZkZjEwNDVkZGI0
10
+ NjVkM2Q4ZTY5OGQwZjkyMDRkYWM4Yzg0YmU3MWVjNzY4OGNjNWNiMDM2M2U4
11
+ YTQxYmZiMDQwM2VjNzI3MTNjOTgzNjdmNzg5ZTQ2NDM4OTJmYTM=
12
12
  data.tar.gz: !binary |-
13
- MzUwNzIzYWM0MDE1MjNhNTFjMGZkYTdjNzU5ZjVhNDcwZGNiY2NlZGUwYTBj
14
- ODY5ODZhMzMxMDUzOGFhM2M4ZDU4YzM2MzVhMTZkN2QyN2I1YzEwOTg1ZDJk
15
- ZDNiYmM0OTExOWZhMjJmOGYxMmRlMDJmZTYxYTMyMDRkMWMzOTA=
13
+ MTk5MjBlZTc1NDQ4OTZiNjE0M2VlZGVkYWE0YWZiM2VhYzJkNjk0MjhjYTI2
14
+ OTc2YzhiZTAxMTBmOWQwZGI0OGNiZjM5NmZiZjk3ODMxMjYwNzYzNDJmZGRj
15
+ ZTMyY2EzZjg1Y2I2MDkwNzM0OGRhOGJkYTllMmVmNTEwNzQzODY=
@@ -13,6 +13,34 @@ module StudyWorkflow
13
13
  study.metadata[:organism]
14
14
  end
15
15
 
16
+ task :binomial_significance => :tsv do
17
+
18
+ tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :fields => ["Matches", "Bases", "Frequency", "p.value"], :namespace => organism)
19
+
20
+ matches = study.knowledge_base.get_index(:mutation_genes).keys
21
+ genes = matches.collect{|m| m.partition("~").last}.uniq
22
+ all_mutations = matches.collect{|m| m.partition("~").first}.uniq
23
+
24
+ total_bases = Gene.gene_list_exon_bases(genes)
25
+ global_frequency = all_mutations.length.to_f / total_bases
26
+
27
+ gene2exon_size = Misc.process_to_hash(genes){|genes| genes.collect{|gene| Gene.gene_list_exon_bases([gene]) }}
28
+
29
+ genes.each do |gene|
30
+ mutations = study.knowledge_base.parents(:mutation_genes, gene).target
31
+ mutations = study.knowledge_base.subset(:sample_mutations, "Genomic Mutation" => mutations).source
32
+ next if mutations.empty?
33
+ matches = mutations.length
34
+ exon_bases = gene2exon_size[gene]
35
+ next if exon_bases == 0
36
+ frequency = matches.to_f / exon_bases
37
+ pvalue = RSRuby.instance.binom_test(matches, exon_bases, global_frequency, 'greater')["p.value"]
38
+ tsv[gene] = [matches, exon_bases, frequency, pvalue]
39
+ end
40
+
41
+ tsv
42
+ end
43
+
16
44
  task :genotype_overview => :tsv do
17
45
  gene_overview = TSV.setup({},
18
46
  :key_field => "Ensembl Gene ID",
@@ -10,6 +10,7 @@ Workflow.require_workflow "Genomics"
10
10
  require 'rbbt/entity/study'
11
11
  require 'rbbt/entity/study/knowledge_base'
12
12
  require 'rbbt/entity/study/samples'
13
+
13
14
  require 'rbbt/expression/matrix'
14
15
 
15
16
  module StudyWorkflow
data/share/R/data.R ADDED
@@ -0,0 +1,76 @@
1
+ rbbt.SE.sample.mutated.genes <- function(study){
2
+ sample.mutated.genes <- rbbt.ruby.substitutions(
3
+ "
4
+ require 'rbbt/workflow'
5
+ Workflow.require_workflow 'StudyExplorer'
6
+
7
+ YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
8
+
9
+ Log.severity = 0
10
+
11
+ study = Study.setup('STUDY')
12
+
13
+ relevant_genes = study.job(:relevant_genes, study).run.uniq
14
+
15
+ tsv = TSV.setup({}, :key_field => 'Sample', :fields => relevant_genes.name, :type => :single)
16
+
17
+ study.cohort.each do |genotype|
18
+ sample = genotype.jobname
19
+ mutated_genes = genotype.genes.compact.flatten.uniq
20
+ tsv[sample] = relevant_genes.collect{|gene| mutated_genes.include?(gene)? 'TRUE' : 'FALSE' }
21
+ end
22
+
23
+ tsv
24
+ ", substitutions = list(STUDY=study))
25
+ }
26
+
27
+ rbbt.SE.gene.kegg.pathway <- function(genes){
28
+ gene_str = rbbt.a.to.string(genes);
29
+ gene.pathways = rbbt.ruby.substitutions(
30
+ "
31
+ require 'rbbt/entity/gene'
32
+ require 'rbbt/sources/kegg'
33
+
34
+ YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
35
+
36
+ Log.severity=0
37
+
38
+ genes = [GENE_STR];
39
+
40
+ Gene.setup(genes, 'Associated Gene Name', 'Hsa/jun2011')
41
+
42
+ pathways = genes.kegg_pathways.compact.flatten.uniq
43
+
44
+ gene_pathways = {}
45
+ genes.each do |gene|
46
+ gene_pathway_list = gene.kegg_pathways || []
47
+ gene_pathways[gene] = pathways.collect{|p| gene_pathway_list.include?(p) ? 1 : 0 }
48
+ end
49
+
50
+ tsv = TSV.setup(gene_pathways, :key_field => 'Associated Gene Name', :fields => [pathways], :type => :flat)
51
+ ", substitutions = list(GENE_STR=gene_str));
52
+
53
+ gene.pathways$Gene = rownames(gene.pathways)
54
+
55
+ return(gene.pathways)
56
+ }
57
+
58
+ rbbt.SE.study.samples <- function(study){
59
+ samples <- rbbt.ruby.substitutions(
60
+ "
61
+ require 'rbbt/workflow'
62
+ Workflow.require_workflow 'StudyExplorer'
63
+
64
+ YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
65
+
66
+ Log.severity = 0
67
+
68
+ study = Study.setup('STUDY')
69
+
70
+ study.samples
71
+ ", substitutions = list(STUDY=study));
72
+
73
+ return(samples);
74
+ }
75
+
76
+
data/share/R/plots.R ADDED
@@ -0,0 +1,122 @@
1
+ rbbt.SE.plot.sort.by.field <- function(plot, field){
2
+ d = plot$data;
3
+
4
+ d[[field]] = reorder(d[[field]], d$Mutated, sum)
5
+
6
+ sample.best.gene.pos.df = ddply(d, "Sample", function(x){ min(match(subset(x, Mutated==TRUE)[[field]], rev(levels(d[[field]]))), na.rm=T)})
7
+
8
+ d$sample.best.gene.pos = NULL
9
+ names(sample.best.gene.pos.df) <- c("Sample", "sample.best.gene.pos");
10
+
11
+ d = merge(d, sample.best.gene.pos.df, all.x=TRUE)
12
+
13
+ d$Sample = reorder(d$Sample, d$sample.best.gene.pos)
14
+
15
+ plot$data = d;
16
+
17
+ return(plot);
18
+ }
19
+
20
+ rbbt.SE.plot.sort.by.mutations <- function(plot){
21
+ d = plot$data;
22
+
23
+ d$Gene = reorder(d$Gene, d$Mutated, sum);
24
+ num.elems = length(levels(d$Gene));
25
+
26
+ #sample.best.gene.pos.df = ddply(d, "Sample", function(x){ 1/mean(1/match(subset(x, Mutated==TRUE)$Gene, rev(levels(d$Gene)))^2)})
27
+ sample.best.gene.pos.df = ddply(d, "Sample", function(x){ 1/sum(2^(num.elems - match(subset(x, Mutated==TRUE)$Gene, rev(levels(d$Gene)))))})
28
+
29
+ d$sample.best.gene.pos = NULL
30
+ names(sample.best.gene.pos.df) <- c("Sample", "sample.best.gene.pos");
31
+
32
+ d = merge(d, sample.best.gene.pos.df, all.x=TRUE)
33
+
34
+ d$Sample = reorder(d$Sample, d$sample.best.gene.pos)
35
+
36
+ plot$data = d;
37
+
38
+ return(plot);
39
+ }
40
+
41
+ rbbt.SE.plot.sort.by.pathway.mutations <- function(plot){
42
+ d = plot$data;
43
+
44
+ d$Pathway = reorder(d$Pathway, d$Mutated, sum);
45
+ num.elems = length(levels(d$Pathway));
46
+
47
+ #sample.best.gene.pos.df = ddply(d, "Sample", function(x){ min(match(subset(x, Mutated==TRUE)$Pathway, rev(levels(d$Pathway))), na.rm=T)})
48
+ #sample.best.gene.pos.df = ddply(d, "Sample", function(x){ 1/mean(1/match(subset(x, Mutated==TRUE)$Pathway, rev(levels(d$Pathway)))^2)})
49
+ sample.best.gene.pos.df = ddply(d, "Sample", function(x){ 1/sum(2^(num.elems - match(subset(x, Mutated==TRUE)$Pathway, rev(levels(d$Pathway)))))})
50
+
51
+ d$sample.best.gene.pos = NULL
52
+ names(sample.best.gene.pos.df) <- c("Sample", "sample.best.gene.pos");
53
+
54
+ d = merge(d, sample.best.gene.pos.df, all.x=TRUE)
55
+
56
+ d$Sample = reorder(d$Sample, d$sample.best.gene.pos)
57
+
58
+ plot$data = d;
59
+
60
+ return(plot);
61
+ }
62
+
63
+ rbbt.SE.plot.mutations <- function(study, cutoff = 3, sample.info = NULL){
64
+ sample.mutated.genes = rbbt.SE.sample.mutated.genes(study);
65
+
66
+ gene.mutation.counts = apply(sample.mutated.genes, 2, function(x){sum(x==TRUE)})
67
+
68
+ recurrent.genes = names(gene.mutation.counts[gene.mutation.counts >= cutoff])
69
+
70
+ d.recurrent = sample.mutated.genes[, recurrent.genes]
71
+ d.recurrent$Sample = rownames(d.recurrent)
72
+
73
+ d.recurrent.m = melt(d.recurrent, "Sample")
74
+
75
+ names(d.recurrent.m) <- c("Sample", "Gene", "Mutated")
76
+
77
+ if (is.null(sample.info)){
78
+ d = d.recurrent.m
79
+ }else{
80
+ d = merge(d.recurrent.m, sample.info, all.x=TRUE)
81
+ }
82
+
83
+ layer.mutations = geom_tile(data=d,aes(x=Sample, y=Gene, alpha=Mutated))
84
+
85
+ rbbt.SE.plot.sort.by.mutations(layer.mutations);
86
+
87
+ return(layer.mutations);
88
+ }
89
+
90
+ rbbt.SE.plot.add.expression <- function(plot, study, ...){
91
+
92
+ genes = unique(plot$data$Gene);
93
+ gene.expression <- rbbt.SE.expression(study, genes, ...);
94
+
95
+ gene.expression.m <- melt(gene.expression);
96
+ names(gene.expression.m) <- c("Gene", "Sample", "Expression");
97
+
98
+ gene.expression.3rd = summary(gene.expression.m$Expression)[["3rd Qu."]]
99
+ gene.expression.1st = summary(gene.expression.m$Expression)[["1st Qu."]]
100
+
101
+ unpadd = as.character(as.numeric(gene.expression.m$Sample));
102
+ unpadd[is.na(unpadd)] = gene.expression.m$Sample[is.na(unpadd)];
103
+ gene.expression.m$Sample = unpadd;
104
+
105
+ mean.gene.expression <- aggregate(Expression ~ Gene, gene.expression.m, mean, trim=0.1, na.rm=T);
106
+ names(mean.gene.expression) <- c("Gene", "Mean");
107
+ gene.expression.m[gene.expression.m[,"Expression"] > gene.expression.3rd, "Expression"] = gene.expression.3rd
108
+ gene.expression.m[gene.expression.m[,"Expression"] < gene.expression.1st, "Expression"] = gene.expression.1st
109
+
110
+ sd.gene.expression <- aggregate(Expression ~ Gene, gene.expression.m, mad, na.rm=T);
111
+ names(sd.gene.expression) <- c("Gene", "SD");
112
+
113
+ gene.expression.m <- merge(gene.expression.m, mean.gene.expression);
114
+ gene.expression.m <- merge(gene.expression.m, sd.gene.expression);
115
+
116
+ #d = merge(d, gene.expression.m, by=c("Sample", "Gene"), all.x=TRUE);
117
+ plot$data = merge(plot$data, gene.expression.m, all.x=TRUE);
118
+
119
+ layer.expression = geom_point(data=plot$data, aes(x=Sample, y=Gene, size=abs((Expression - Mean) / SD), color=((Expression - Mean) / SD)));
120
+
121
+ return(layer.expression)
122
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-study
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.19
4
+ version: 0.2.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-05 00:00:00.000000000 Z
11
+ date: 2013-12-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem add the study entity with suport for NGS, Microarray and other
14
14
  types of data
@@ -43,6 +43,8 @@ files:
43
43
  - lib/rbbt/entity/study/plots.rb
44
44
  - lib/rbbt/entity/study/samples.rb
45
45
  - lib/rbbt/entity/study/snp.rb
46
+ - share/R/data.R
47
+ - share/R/plots.R
46
48
  homepage: http://github.com/mikisvaz/rbbt-study
47
49
  licenses:
48
50
  - MIT