rbbt-study 0.2.30 → 0.2.31
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- metadata +2 -24
- data/lib/rbbt/entity/study.rb +0 -172
- data/lib/rbbt/entity/study/cnv.rb +0 -170
- data/lib/rbbt/entity/study/cnv/genes.rb +0 -28
- data/lib/rbbt/entity/study/cnv/knowledge_base.rb +0 -39
- data/lib/rbbt/entity/study/cnv/samples.rb +0 -54
- data/lib/rbbt/entity/study/enrichment.rb +0 -418
- data/lib/rbbt/entity/study/expression.rb +0 -24
- data/lib/rbbt/entity/study/features.rb +0 -17
- data/lib/rbbt/entity/study/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes.rb +0 -134
- data/lib/rbbt/entity/study/genotypes/enrichment.rb +0 -56
- data/lib/rbbt/entity/study/genotypes/genes.rb +0 -104
- data/lib/rbbt/entity/study/genotypes/knowledge_base.rb +0 -81
- data/lib/rbbt/entity/study/genotypes/mutations.rb +0 -34
- data/lib/rbbt/entity/study/genotypes/samples.rb +0 -28
- data/lib/rbbt/entity/study/knowledge_base.rb +0 -35
- data/lib/rbbt/entity/study/methylation.rb +0 -90
- data/lib/rbbt/entity/study/methylation/samples.rb +0 -31
- data/lib/rbbt/entity/study/mutations.rb +0 -259
- data/lib/rbbt/entity/study/plots.rb +0 -140
- data/lib/rbbt/entity/study/samples.rb +0 -78
- data/lib/rbbt/entity/study/snp.rb +0 -87
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef1247f0c3941dd61e65fcb4a198e42967b30b95
|
4
|
+
data.tar.gz: 0008d2cf03e10e104a86631d387fb53fd825c639
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7b4c891e0500ad8bb18dc098aa5830fd4d80d8d201722139b298567aaf7dcbb02c48d2e8f964bc85649d385fe587a40d49635ba3b83b46feb36606d3f10acc3
|
7
|
+
data.tar.gz: 65ca74cc1659633d89eec772c36d3860bbac75744033c891c5cc77c8d0ba048dd75436345d549b1824f22d5fc858350e65ec72b4cf1535c5c276c587f0f9cdfa
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-study
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.31
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This gem add the study entity with suport for NGS, Microarray and other
|
14
14
|
types of data
|
@@ -21,28 +21,6 @@ extra_rdoc_files:
|
|
21
21
|
files:
|
22
22
|
- LICENSE.txt
|
23
23
|
- README.rdoc
|
24
|
-
- lib/rbbt/entity/study.rb
|
25
|
-
- lib/rbbt/entity/study/cnv.rb
|
26
|
-
- lib/rbbt/entity/study/cnv/genes.rb
|
27
|
-
- lib/rbbt/entity/study/cnv/knowledge_base.rb
|
28
|
-
- lib/rbbt/entity/study/cnv/samples.rb
|
29
|
-
- lib/rbbt/entity/study/enrichment.rb
|
30
|
-
- lib/rbbt/entity/study/expression.rb
|
31
|
-
- lib/rbbt/entity/study/features.rb
|
32
|
-
- lib/rbbt/entity/study/genes.rb
|
33
|
-
- lib/rbbt/entity/study/genotypes.rb
|
34
|
-
- lib/rbbt/entity/study/genotypes/enrichment.rb
|
35
|
-
- lib/rbbt/entity/study/genotypes/genes.rb
|
36
|
-
- lib/rbbt/entity/study/genotypes/knowledge_base.rb
|
37
|
-
- lib/rbbt/entity/study/genotypes/mutations.rb
|
38
|
-
- lib/rbbt/entity/study/genotypes/samples.rb
|
39
|
-
- lib/rbbt/entity/study/knowledge_base.rb
|
40
|
-
- lib/rbbt/entity/study/methylation.rb
|
41
|
-
- lib/rbbt/entity/study/methylation/samples.rb
|
42
|
-
- lib/rbbt/entity/study/mutations.rb
|
43
|
-
- lib/rbbt/entity/study/plots.rb
|
44
|
-
- lib/rbbt/entity/study/samples.rb
|
45
|
-
- lib/rbbt/entity/study/snp.rb
|
46
24
|
- share/R/data.R
|
47
25
|
- share/R/plots.R
|
48
26
|
homepage: http://github.com/mikisvaz/rbbt-study
|
data/lib/rbbt/entity/study.rb
DELETED
@@ -1,172 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/util/misc'
|
3
|
-
|
4
|
-
require 'rbbt/entity'
|
5
|
-
require 'rbbt/resource'
|
6
|
-
require 'rbbt/workflow'
|
7
|
-
|
8
|
-
Workflow.require_workflow "Genomics"
|
9
|
-
|
10
|
-
require 'rbbt/entity/study'
|
11
|
-
require 'rbbt/entity/study/knowledge_base'
|
12
|
-
require 'rbbt/entity/study/samples'
|
13
|
-
|
14
|
-
|
15
|
-
module StudyWorkflow
|
16
|
-
extend Workflow
|
17
|
-
|
18
|
-
class << self
|
19
|
-
attr_accessor :study
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.workdir
|
23
|
-
@workdir ||= Rbbt.var.jobs["Study"].find
|
24
|
-
end
|
25
|
-
|
26
|
-
helper :study do
|
27
|
-
@study
|
28
|
-
end
|
29
|
-
|
30
|
-
helper :dir do
|
31
|
-
study.dir
|
32
|
-
end
|
33
|
-
|
34
|
-
helper :organism do
|
35
|
-
study.metadata[:organism]
|
36
|
-
end
|
37
|
-
|
38
|
-
def self.job(*args)
|
39
|
-
super(*args).tap{|s| s.instance_variable_set("@study", @study) }
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
module Study
|
44
|
-
extend Entity
|
45
|
-
extend Resource
|
46
|
-
include LocalPersist
|
47
|
-
|
48
|
-
class << self
|
49
|
-
attr_accessor :study_dir
|
50
|
-
def study_dir
|
51
|
-
@study_dir ||= begin
|
52
|
-
case
|
53
|
-
when (not defined?(Rbbt))
|
54
|
-
Path.setup(File.join(ENV["HOME"], '.studies'))
|
55
|
-
when Rbbt.etc.study_dir.exists?
|
56
|
-
Path.setup(Rbbt.etc.study_dir.read.chomp)
|
57
|
-
else
|
58
|
-
Rbbt.studies.find
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
attr_accessor :workflow, :dir
|
65
|
-
|
66
|
-
def job(task, *args)
|
67
|
-
name, inputs = args
|
68
|
-
if inputs.nil? and Hash === name
|
69
|
-
inputs = name
|
70
|
-
name = nil
|
71
|
-
end
|
72
|
-
name = self if name.nil? or name == :self or name == "self"
|
73
|
-
step = workflow.job(task, name, {:organism => metadata[:organism], :watson => metadata[:watson]}.merge(inputs || {}))
|
74
|
-
step.instance_variable_set(:@study, self)
|
75
|
-
step
|
76
|
-
end
|
77
|
-
|
78
|
-
def workflow(&block)
|
79
|
-
if block_given?
|
80
|
-
@workflow.instance_eval &block
|
81
|
-
else
|
82
|
-
@workflow
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.annotation_repo
|
87
|
-
@annotation_repo ||= Rbbt.var.cache.annotation_repo.find
|
88
|
-
end
|
89
|
-
|
90
|
-
def self.extended(base)
|
91
|
-
base.workflow = StudyWorkflow.clone
|
92
|
-
base.workflow.study = base
|
93
|
-
|
94
|
-
if File.exists?(setup_file = File.join(base.dir, 'setup.rb'))
|
95
|
-
base.instance_eval Open.read(setup_file), setup_file
|
96
|
-
end
|
97
|
-
|
98
|
-
base.local_persist_dir = Rbbt.var.cache.studies[base].persistence.find
|
99
|
-
|
100
|
-
base
|
101
|
-
end
|
102
|
-
|
103
|
-
def self.studies
|
104
|
-
Dir.glob(File.join(Path === study_dir ? study_dir.find : study_dir, '*')).
|
105
|
-
select{|f| File.directory? f}.sort.collect{|s| Study.setup(File.basename(s))}
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.studies
|
109
|
-
case study_dir
|
110
|
-
when nil
|
111
|
-
[]
|
112
|
-
when Path
|
113
|
-
study_dir.find_all.collect do |study_path|
|
114
|
-
study_path.glob('*').select{|f| f.directory? }
|
115
|
-
end.flatten.collect{|f| self.annotate f}
|
116
|
-
else
|
117
|
-
Dir.glob(File.join(study_dir, "*"))
|
118
|
-
end.sort.collect do |dir|
|
119
|
-
study = Study.setup(File.basename(dir))
|
120
|
-
study.dir = study_dir.annotate(dir)
|
121
|
-
study
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def dir
|
126
|
-
@dir ||= if Path === Study.study_dir
|
127
|
-
Study.study_dir[self]
|
128
|
-
else
|
129
|
-
Path.setup(File.join(Study.study_dir.dup, self), nil, Study)
|
130
|
-
end
|
131
|
-
@dir
|
132
|
-
end
|
133
|
-
|
134
|
-
def metadata
|
135
|
-
@metadata ||= (dir["metadata.yaml"].yaml.extend IndiferentHash)
|
136
|
-
end
|
137
|
-
|
138
|
-
def users
|
139
|
-
@users ||= metadata[:users] || []
|
140
|
-
end
|
141
|
-
|
142
|
-
#{{{ Attributes
|
143
|
-
attr_accessor :organism
|
144
|
-
def organism
|
145
|
-
@organism ||= metadata["organism"]
|
146
|
-
end
|
147
|
-
|
148
|
-
def matrix_file(name)
|
149
|
-
dir.matrices[name.to_s].produce.find
|
150
|
-
end
|
151
|
-
|
152
|
-
def matrices
|
153
|
-
dir.matrices.glob('*').collect{|f| f.basename}
|
154
|
-
end
|
155
|
-
|
156
|
-
def matrix(type, format = "Ensembl Gene ID", organism = nil)
|
157
|
-
organism = self.metadata[:organism] if organism.nil?
|
158
|
-
raise "No matrices defined for study #{ self }" unless defined? matrices.empty?
|
159
|
-
raise "No type specified" if type.nil?
|
160
|
-
type = type.to_s
|
161
|
-
raise "No matrix #{ type } defined for study #{ self }" unless matrices.include? type
|
162
|
-
data = dir.matrices[type].data.find if dir.matrices[type].data.exists?
|
163
|
-
if dir.matrices[type].identifiers.exists?
|
164
|
-
identifiers = dir.matrices[type].identifiers.find
|
165
|
-
else
|
166
|
-
identifiers = Organism.identifiers(organism).find
|
167
|
-
end
|
168
|
-
samples = dir.matrices[type].samples.find if dir.matrices[type].samples.exists?
|
169
|
-
samples = dir.samples.find if samples.nil? and dir.samples.exist?
|
170
|
-
Matrix.new(data, samples, "count", format, organism, identifiers)
|
171
|
-
end
|
172
|
-
end
|
@@ -1,170 +0,0 @@
|
|
1
|
-
require 'rbbt/entity/cnv'
|
2
|
-
|
3
|
-
require 'rbbt/entity/study/cnv/genes'
|
4
|
-
require 'rbbt/entity/study/cnv/samples'
|
5
|
-
|
6
|
-
module StudyWorkflow
|
7
|
-
helper :organism do
|
8
|
-
study.metadata[:organism]
|
9
|
-
end
|
10
|
-
|
11
|
-
task :cnv_overview => :tsv do
|
12
|
-
gene_overview = TSV.setup({},
|
13
|
-
:key_field => "Ensembl Gene ID",
|
14
|
-
:fields => ["Samples with gene lost", "Samples with gene gained"],
|
15
|
-
:type => :double
|
16
|
-
)
|
17
|
-
|
18
|
-
cnv_samples = study.samples.select_by(:has_cnv?)
|
19
|
-
|
20
|
-
log :samples, "Gathering affected samples"
|
21
|
-
samples_gene_status = {}
|
22
|
-
all_genes = []
|
23
|
-
cnv_samples.each do |sample|
|
24
|
-
samples_gene_status[sample] = {}
|
25
|
-
|
26
|
-
lost_genes = sample.lost_genes
|
27
|
-
lost_genes.clean_annotations.each do |gene|
|
28
|
-
samples_gene_status[sample][gene] ||= [false, false]
|
29
|
-
samples_gene_status[sample][gene][0] = true
|
30
|
-
all_genes << gene
|
31
|
-
end if lost_genes.any?
|
32
|
-
|
33
|
-
gained_genes = sample.gained_genes
|
34
|
-
gained_genes.clean_annotations.each do |gene|
|
35
|
-
samples_gene_status[sample][gene] ||= [false, false]
|
36
|
-
samples_gene_status[sample][gene][1] = true
|
37
|
-
all_genes << gene
|
38
|
-
end if gained_genes.any?
|
39
|
-
end
|
40
|
-
|
41
|
-
log :compiling, "Compiling result"
|
42
|
-
all_genes.uniq.sort.each do |gene|
|
43
|
-
gene_overview[gene] = []
|
44
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
|
45
|
-
gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
|
46
|
-
end
|
47
|
-
|
48
|
-
gene_overview
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
module Study
|
53
|
-
def has_cnv?
|
54
|
-
dir.cnv.exists?
|
55
|
-
end
|
56
|
-
|
57
|
-
def cnv_files
|
58
|
-
dir.cnv.find.glob("*")
|
59
|
-
end
|
60
|
-
|
61
|
-
def cnv_cohort
|
62
|
-
if @cnv_cohort.nil?
|
63
|
-
@cnv_cohort = {}
|
64
|
-
cnv_files.each do |f|
|
65
|
-
sample = File.basename(f)
|
66
|
-
Sample.setup(sample, self)
|
67
|
-
cnvs = Open.read(f).split("\n").sort
|
68
|
-
CNV.setup(cnvs, organism)
|
69
|
-
@cnv_cohort[sample] = cnvs
|
70
|
-
end
|
71
|
-
end
|
72
|
-
@cnv_cohort
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
module Study
|
77
|
-
property :recurrently_lost_genes => :single do |threshold|
|
78
|
-
counts = {}
|
79
|
-
self.samples.each do |sample|
|
80
|
-
next unless sample.has_cnvs?
|
81
|
-
puts sample
|
82
|
-
|
83
|
-
genes = nil
|
84
|
-
genes = sample.lost_genes.clean_annotations
|
85
|
-
genes.each do |gene|
|
86
|
-
counts[gene] ||= 0
|
87
|
-
counts[gene] += 1
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
|
92
|
-
Gene.setup(recurrent, "Ensembl Gene ID", organism)
|
93
|
-
end
|
94
|
-
|
95
|
-
property :recurrently_gained_genes => :single do |threshold|
|
96
|
-
counts = {}
|
97
|
-
self.samples.each do |sample|
|
98
|
-
next unless sample.has_cnvs?
|
99
|
-
puts sample
|
100
|
-
|
101
|
-
genes = nil
|
102
|
-
genes = sample.gained_genes.clean_annotations
|
103
|
-
genes.each do |gene|
|
104
|
-
counts[gene] ||= 0
|
105
|
-
counts[gene] += 1
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
|
110
|
-
Gene.setup(recurrent, "Ensembl Gene ID", organism)
|
111
|
-
end
|
112
|
-
|
113
|
-
property :gene_sample_cnv_matrix => :single do
|
114
|
-
tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
|
115
|
-
samples = []
|
116
|
-
i = 0
|
117
|
-
num_samples = cohort.length
|
118
|
-
cnv_cohort.each do |sample,cnvs|
|
119
|
-
cnvs.genes.compact.flatten.uniq.each do |gene|
|
120
|
-
tsv[gene] ||= ["FALSE"] * num_samples
|
121
|
-
tsv[gene][i] = "TRUE"
|
122
|
-
end
|
123
|
-
samples << sample
|
124
|
-
i += 1
|
125
|
-
end
|
126
|
-
|
127
|
-
tsv.fields = samples
|
128
|
-
|
129
|
-
tsv
|
130
|
-
end
|
131
|
-
|
132
|
-
property :gene_sample_gain_matrix => :single do
|
133
|
-
tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
|
134
|
-
samples = []
|
135
|
-
i = 0
|
136
|
-
num_samples = cohort.length
|
137
|
-
cnv_cohort.each do |sample,cnvs|
|
138
|
-
cnvs.select_by(:gain?).genes.compact.flatten.uniq.each do |gene|
|
139
|
-
tsv[gene] ||= ["FALSE"] * num_samples
|
140
|
-
tsv[gene][i] = "TRUE"
|
141
|
-
end
|
142
|
-
samples << sample
|
143
|
-
i += 1
|
144
|
-
end
|
145
|
-
|
146
|
-
tsv.fields = samples
|
147
|
-
|
148
|
-
tsv
|
149
|
-
end
|
150
|
-
|
151
|
-
|
152
|
-
property :gene_sample_loss_matrix => :single do
|
153
|
-
tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
|
154
|
-
samples = []
|
155
|
-
i = 0
|
156
|
-
num_samples = cohort.length
|
157
|
-
cnv_cohort.each do |sample,cnvs|
|
158
|
-
cnvs.select_by(:loss?).genes.compact.flatten.uniq.each do |gene|
|
159
|
-
tsv[gene] ||= ["FALSE"] * num_samples
|
160
|
-
tsv[gene][i] = "TRUE"
|
161
|
-
end
|
162
|
-
samples << sample
|
163
|
-
i += 1
|
164
|
-
end
|
165
|
-
|
166
|
-
tsv.fields = samples
|
167
|
-
|
168
|
-
tsv
|
169
|
-
end
|
170
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
module Study
|
2
|
-
|
3
|
-
property :samples_with_gene_gained => :single do
|
4
|
-
samples_with_gene_gained = {}
|
5
|
-
self.job(:cnv_overview).run.through do |gene, values|
|
6
|
-
values.fields.zip(values).each do |sample, value|
|
7
|
-
next unless value == "Gained"
|
8
|
-
samples_with_gene_gained[gene] ||= []
|
9
|
-
samples_with_gene_gained[gene] << sample
|
10
|
-
end
|
11
|
-
end
|
12
|
-
samples_with_gene_gained
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
property :samples_with_gene_lost => :single do
|
17
|
-
samples_with_gene_lost = {}
|
18
|
-
self.job(:cnv_overview).run.through do |gene, values|
|
19
|
-
values.fields.zip(values).each do |sample, value|
|
20
|
-
next unless value == "Lost"
|
21
|
-
samples_with_gene_lost[gene] ||= []
|
22
|
-
samples_with_gene_lost[gene] << sample
|
23
|
-
end
|
24
|
-
end
|
25
|
-
samples_with_gene_lost
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'rbbt/knowledge_base'
|
2
|
-
require 'rbbt/workflow'
|
3
|
-
Workflow.require_workflow "Genomics"
|
4
|
-
require 'rbbt/entity/study'
|
5
|
-
require 'rbbt/entity/study/cnv'
|
6
|
-
require 'rbbt/entity/gene'
|
7
|
-
require 'rbbt/entity/genomic_mutation'
|
8
|
-
|
9
|
-
module Study
|
10
|
-
|
11
|
-
self.study_registry[:sample_cnv_genes] = Proc.new{|study,database|
|
12
|
-
tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID", "CNV Variation"], :type => :double, :namespace => study.organism)
|
13
|
-
|
14
|
-
all_cnvs = CNV.setup(study.cnv_cohort.values.flatten, study.organism)
|
15
|
-
cnv2genes = Misc.process_to_hash(all_cnvs){|cnvs| cnvs.genes }
|
16
|
-
|
17
|
-
study.cnv_cohort.each do |sample,cnvs|
|
18
|
-
Log.info sample
|
19
|
-
genes = []
|
20
|
-
variations = []
|
21
|
-
cnvs.variation.zip(cnv2genes.chunked_values_at(cnvs)).each_with_index do |p,i|
|
22
|
-
variation, genes = p
|
23
|
-
Annotated.purge(genes).each{|gene| genes << gene; variations << variation }
|
24
|
-
end
|
25
|
-
tsv[sample] = [genes.to_a, variations.to_a]
|
26
|
-
end
|
27
|
-
|
28
|
-
tsv
|
29
|
-
}
|
30
|
-
|
31
|
-
end
|
32
|
-
|
33
|
-
if __FILE__ == $0
|
34
|
-
Workflow.require_workflow "ICGC"
|
35
|
-
Study.study_dir = ICGC.root
|
36
|
-
s = Study.setup("Glioblastoma_Multiforme-TCGA-US")
|
37
|
-
puts s.knowledge_base.get_database(:sample_cnv_genes).value_peek
|
38
|
-
|
39
|
-
end
|