rbbt-study 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZTk4NjAxNTJkMDAwOWY2ODExYzFlYzQzYzY2OGY5MWE1NGVhNWY4YQ==
5
+ data.tar.gz: !binary |-
6
+ OWJlZjM5NTU5YTAwOWU5OWMzMTc2NjZjZDhjZWQ5YTg4MTI0YzgyOA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZjM2MmJhOGVhMWI0YTcwOTFjOGRkNjA4ZjczNGU1YjZiNjVhOTA0YjY4ZDk1
10
+ ZTZjODAwNTBkMDI2MjMwZmM3YzljZDdmOGMzNmM3NTU3NzdmZDkxM2RmZDFj
11
+ NzgxYTk5ZGJmOTk3NGM2Mzk1ZmQ1ZmU3NDdkNWRjOGI1N2Q5YTU=
12
+ data.tar.gz: !binary |-
13
+ YmViM2IyNjk0YzgwNmQxYTU2Y2IwMTBiZGIyY2IxMjk1YWZjMzgwOThkMDE3
14
+ ZGJhYzgzNmQ2MDg2NDljNzQ2ZjA4NjMwYzFkYTdlNjM3YTYwNzVhZDQwMzgx
15
+ MTA2YjIyMzI1OTdiMzM4YzBjOTI5Yjc0MWRkMjdiNzg4MGUzZDg=
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Miguel Vazquez
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = rbbt-study
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to rbbt-study
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
+ * Fork the project.
10
+ * Start a feature/bugfix branch.
11
+ * Commit and push until you are happy with your contribution.
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2013 Miguel Vazquez. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,28 @@
1
+ module Study
2
+
3
+ property :samples_with_gene_gained => :single do
4
+ samples_with_gene_gained = {}
5
+ self.job(:cnv_overview).run.through do |gene, values|
6
+ values.fields.zip(values).each do |sample, value|
7
+ next unless value == "Gained"
8
+ samples_with_gene_gained[gene] ||= []
9
+ samples_with_gene_gained[gene] << sample
10
+ end
11
+ end
12
+ samples_with_gene_gained
13
+ end
14
+
15
+
16
+ property :samples_with_gene_lost => :single do
17
+ samples_with_gene_lost = {}
18
+ self.job(:cnv_overview).run.through do |gene, values|
19
+ values.fields.zip(values).each do |sample, value|
20
+ next unless value == "Lost"
21
+ samples_with_gene_lost[gene] ||= []
22
+ samples_with_gene_lost[gene] << sample
23
+ end
24
+ end
25
+ samples_with_gene_lost
26
+ end
27
+
28
+ end
@@ -0,0 +1,55 @@
1
+ module Sample
2
+ property :cnvs => :array2single do
3
+ study.cnv_cohort
4
+ end
5
+
6
+ property :has_cnv? => :array2single do
7
+ study.cnv_cohort.values_at(*self).collect{|cnvs| not cnvs.nil?}
8
+ end
9
+
10
+ property :gene_CN => :single do
11
+ gene_CN = {}
12
+ cnvs.variation.zip(cnvs.genes).each do |var, genes|
13
+ genes = genes.clean_annotations
14
+ case var
15
+ when "loss"
16
+ genes.each{|gene| gene_CN[gene] = "Lost"}
17
+ when "gain"
18
+ genes.each{|gene| gene_CN[gene] = "Gained"}
19
+ end
20
+ end
21
+ gene_CN
22
+ end
23
+ persist :gene_CN
24
+
25
+ property :gained_cnvs => :single do
26
+ return nil if cnvs.nil?
27
+ return [] if cnvs.empty?
28
+ cnvs.select_by(:gain?)
29
+ end
30
+
31
+ property :lost_cnvs => :single do
32
+ return nil if cnvs.nil?
33
+ return [] if cnvs.empty?
34
+ cnvs.select_by(:loss?)
35
+ end
36
+
37
+ property :gained_genes => :single do
38
+ Gene.setup(gene_CN.select{|g,v| v == "Gained"}.collect{|g,v| g}, "Ensembl Gene ID", self.study.organism)
39
+ end
40
+
41
+
42
+ property :lost_genes => :single do
43
+ Gene.setup(gene_CN.select{|g,v| v == "Lost"}.collect{|g,v| g}, "Ensembl Gene ID", self.study.organism)
44
+ end
45
+
46
+
47
+ property :cnv_genes => :single do
48
+ return nil if lost_genes.nil? or gained_genes.nil?
49
+ organism = study.metadata[:organism]
50
+ Gene.setup((lost_genes + gained_genes).uniq, "Ensembl Gene ID", lost_genes.organism)
51
+ end
52
+
53
+ end
54
+
55
+
@@ -0,0 +1,170 @@
1
+ require 'rbbt/entity/cnv'
2
+
3
+ require 'rbbt/entity/study/cnv/genes'
4
+ require 'rbbt/entity/study/cnv/samples'
5
+
6
+ module StudyWorkflow
7
+ helper :organism do
8
+ study.metadata[:organism]
9
+ end
10
+
11
+ task :cnv_overview => :tsv do
12
+ gene_overview = TSV.setup({},
13
+ :key_field => "Ensembl Gene ID",
14
+ :fields => ["Samples with gene lost", "Samples with gene gained"],
15
+ :type => :double
16
+ )
17
+
18
+ cnv_samples = study.samples.select_by(:has_cnv?)
19
+
20
+ log :samples, "Gathering affected samples"
21
+ samples_gene_status = {}
22
+ all_genes = []
23
+ cnv_samples.each do |sample|
24
+ samples_gene_status[sample] = {}
25
+
26
+ lost_genes = sample.lost_genes
27
+ lost_genes.clean_annotations.each do |gene|
28
+ samples_gene_status[sample][gene] ||= [false, false]
29
+ samples_gene_status[sample][gene][0] = true
30
+ all_genes << gene
31
+ end if lost_genes.any?
32
+
33
+ gained_genes = sample.gained_genes
34
+ gained_genes.clean_annotations.each do |gene|
35
+ samples_gene_status[sample][gene] ||= [false, false]
36
+ samples_gene_status[sample][gene][1] = true
37
+ all_genes << gene
38
+ end if gained_genes.any?
39
+ end
40
+
41
+ log :compiling, "Compiling result"
42
+ all_genes.uniq.sort.each do |gene|
43
+ gene_overview[gene] = []
44
+ gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
45
+ gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
46
+ end
47
+
48
+ gene_overview
49
+ end
50
+ end
51
+
52
+ module Study
53
+ def has_cnv?
54
+ dir.cnv.exists?
55
+ end
56
+
57
+ def cnv_files
58
+ dir.cnv.find.glob("*")
59
+ end
60
+
61
+ def cnv_cohort
62
+ if @cnv_cohort.nil?
63
+ @cnv_cohort = {}
64
+ cnv_files.each do |f|
65
+ sample = File.basename(f)
66
+ Sample.setup(sample, self)
67
+ cnvs = Open.read(f).split("\n").sort
68
+ CNV.setup(cnvs, organism)
69
+ @cnv_cohort[sample] = cnvs
70
+ end
71
+ end
72
+ @cnv_cohort
73
+ end
74
+ end
75
+
76
+ module Study
77
+ property :recurrently_lost_genes => :single do |threshold|
78
+ counts = {}
79
+ self.samples.each do |sample|
80
+ next unless sample.has_cnvs?
81
+ puts sample
82
+
83
+ genes = nil
84
+ genes = sample.lost_genes.clean_annotations
85
+ genes.each do |gene|
86
+ counts[gene] ||= 0
87
+ counts[gene] += 1
88
+ end
89
+ end
90
+
91
+ recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
92
+ Gene.setup(recurrent, "Ensembl Gene ID", organism)
93
+ end
94
+
95
+ property :recurrently_gained_genes => :single do |threshold|
96
+ counts = {}
97
+ self.samples.each do |sample|
98
+ next unless sample.has_cnvs?
99
+ puts sample
100
+
101
+ genes = nil
102
+ genes = sample.gained_genes.clean_annotations
103
+ genes.each do |gene|
104
+ counts[gene] ||= 0
105
+ counts[gene] += 1
106
+ end
107
+ end
108
+
109
+ recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
110
+ Gene.setup(recurrent, "Ensembl Gene ID", organism)
111
+ end
112
+
113
+ property :gene_sample_cnv_matrix => :single do
114
+ tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
115
+ samples = []
116
+ i = 0
117
+ num_samples = cohort.length
118
+ cnv_cohort.each do |sample,cnvs|
119
+ cnvs.genes.compact.flatten.uniq.each do |gene|
120
+ tsv[gene] ||= ["FALSE"] * num_samples
121
+ tsv[gene][i] = "TRUE"
122
+ end
123
+ samples << sample
124
+ i += 1
125
+ end
126
+
127
+ tsv.fields = samples
128
+
129
+ tsv
130
+ end
131
+
132
+ property :gene_sample_gain_matrix => :single do
133
+ tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
134
+ samples = []
135
+ i = 0
136
+ num_samples = cohort.length
137
+ cnv_cohort.each do |sample,cnvs|
138
+ cnvs.select_by(:gain?).genes.compact.flatten.uniq.each do |gene|
139
+ tsv[gene] ||= ["FALSE"] * num_samples
140
+ tsv[gene][i] = "TRUE"
141
+ end
142
+ samples << sample
143
+ i += 1
144
+ end
145
+
146
+ tsv.fields = samples
147
+
148
+ tsv
149
+ end
150
+
151
+
152
+ property :gene_sample_loss_matrix => :single do
153
+ tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
154
+ samples = []
155
+ i = 0
156
+ num_samples = cohort.length
157
+ cnv_cohort.each do |sample,cnvs|
158
+ cnvs.select_by(:loss?).genes.compact.flatten.uniq.each do |gene|
159
+ tsv[gene] ||= ["FALSE"] * num_samples
160
+ tsv[gene][i] = "TRUE"
161
+ end
162
+ samples << sample
163
+ i += 1
164
+ end
165
+
166
+ tsv.fields = samples
167
+
168
+ tsv
169
+ end
170
+ end