rbbt-study 0.2.30 → 0.2.31

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 634c54bf8560389b8c5c49e996d968f37883b720
4
- data.tar.gz: c848a8dd97e570a123b913a747cb3bb63787538e
3
+ metadata.gz: ef1247f0c3941dd61e65fcb4a198e42967b30b95
4
+ data.tar.gz: 0008d2cf03e10e104a86631d387fb53fd825c639
5
5
  SHA512:
6
- metadata.gz: 005e523b4449ef10dc05bf470b60ad713990e47d904dababdbb5d0a682db3782df5f1f713565fb6d2281c745281521856d930bd283b4691ee89ffa8776b79345
7
- data.tar.gz: dd9ce8ac4736a08d5c4bee8786b843f3c0796bb8e8cb8ca81d4d0d08218888785cb49aaa76a221d4cfbb49379b766e6b5577417d8e1566c612e42536c26686db
6
+ metadata.gz: f7b4c891e0500ad8bb18dc098aa5830fd4d80d8d201722139b298567aaf7dcbb02c48d2e8f964bc85649d385fe587a40d49635ba3b83b46feb36606d3f10acc3
7
+ data.tar.gz: 65ca74cc1659633d89eec772c36d3860bbac75744033c891c5cc77c8d0ba048dd75436345d549b1824f22d5fc858350e65ec72b4cf1535c5c276c587f0f9cdfa
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-study
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.30
4
+ version: 0.2.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-14 00:00:00.000000000 Z
11
+ date: 2014-08-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem add the study entity with suport for NGS, Microarray and other
14
14
  types of data
@@ -21,28 +21,6 @@ extra_rdoc_files:
21
21
  files:
22
22
  - LICENSE.txt
23
23
  - README.rdoc
24
- - lib/rbbt/entity/study.rb
25
- - lib/rbbt/entity/study/cnv.rb
26
- - lib/rbbt/entity/study/cnv/genes.rb
27
- - lib/rbbt/entity/study/cnv/knowledge_base.rb
28
- - lib/rbbt/entity/study/cnv/samples.rb
29
- - lib/rbbt/entity/study/enrichment.rb
30
- - lib/rbbt/entity/study/expression.rb
31
- - lib/rbbt/entity/study/features.rb
32
- - lib/rbbt/entity/study/genes.rb
33
- - lib/rbbt/entity/study/genotypes.rb
34
- - lib/rbbt/entity/study/genotypes/enrichment.rb
35
- - lib/rbbt/entity/study/genotypes/genes.rb
36
- - lib/rbbt/entity/study/genotypes/knowledge_base.rb
37
- - lib/rbbt/entity/study/genotypes/mutations.rb
38
- - lib/rbbt/entity/study/genotypes/samples.rb
39
- - lib/rbbt/entity/study/knowledge_base.rb
40
- - lib/rbbt/entity/study/methylation.rb
41
- - lib/rbbt/entity/study/methylation/samples.rb
42
- - lib/rbbt/entity/study/mutations.rb
43
- - lib/rbbt/entity/study/plots.rb
44
- - lib/rbbt/entity/study/samples.rb
45
- - lib/rbbt/entity/study/snp.rb
46
24
  - share/R/data.R
47
25
  - share/R/plots.R
48
26
  homepage: http://github.com/mikisvaz/rbbt-study
@@ -1,172 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/misc'
3
-
4
- require 'rbbt/entity'
5
- require 'rbbt/resource'
6
- require 'rbbt/workflow'
7
-
8
- Workflow.require_workflow "Genomics"
9
-
10
- require 'rbbt/entity/study'
11
- require 'rbbt/entity/study/knowledge_base'
12
- require 'rbbt/entity/study/samples'
13
-
14
-
15
- module StudyWorkflow
16
- extend Workflow
17
-
18
- class << self
19
- attr_accessor :study
20
- end
21
-
22
- def self.workdir
23
- @workdir ||= Rbbt.var.jobs["Study"].find
24
- end
25
-
26
- helper :study do
27
- @study
28
- end
29
-
30
- helper :dir do
31
- study.dir
32
- end
33
-
34
- helper :organism do
35
- study.metadata[:organism]
36
- end
37
-
38
- def self.job(*args)
39
- super(*args).tap{|s| s.instance_variable_set("@study", @study) }
40
- end
41
- end
42
-
43
- module Study
44
- extend Entity
45
- extend Resource
46
- include LocalPersist
47
-
48
- class << self
49
- attr_accessor :study_dir
50
- def study_dir
51
- @study_dir ||= begin
52
- case
53
- when (not defined?(Rbbt))
54
- Path.setup(File.join(ENV["HOME"], '.studies'))
55
- when Rbbt.etc.study_dir.exists?
56
- Path.setup(Rbbt.etc.study_dir.read.chomp)
57
- else
58
- Rbbt.studies.find
59
- end
60
- end
61
- end
62
- end
63
-
64
- attr_accessor :workflow, :dir
65
-
66
- def job(task, *args)
67
- name, inputs = args
68
- if inputs.nil? and Hash === name
69
- inputs = name
70
- name = nil
71
- end
72
- name = self if name.nil? or name == :self or name == "self"
73
- step = workflow.job(task, name, {:organism => metadata[:organism], :watson => metadata[:watson]}.merge(inputs || {}))
74
- step.instance_variable_set(:@study, self)
75
- step
76
- end
77
-
78
- def workflow(&block)
79
- if block_given?
80
- @workflow.instance_eval &block
81
- else
82
- @workflow
83
- end
84
- end
85
-
86
- def self.annotation_repo
87
- @annotation_repo ||= Rbbt.var.cache.annotation_repo.find
88
- end
89
-
90
- def self.extended(base)
91
- base.workflow = StudyWorkflow.clone
92
- base.workflow.study = base
93
-
94
- if File.exists?(setup_file = File.join(base.dir, 'setup.rb'))
95
- base.instance_eval Open.read(setup_file), setup_file
96
- end
97
-
98
- base.local_persist_dir = Rbbt.var.cache.studies[base].persistence.find
99
-
100
- base
101
- end
102
-
103
- def self.studies
104
- Dir.glob(File.join(Path === study_dir ? study_dir.find : study_dir, '*')).
105
- select{|f| File.directory? f}.sort.collect{|s| Study.setup(File.basename(s))}
106
- end
107
-
108
- def self.studies
109
- case study_dir
110
- when nil
111
- []
112
- when Path
113
- study_dir.find_all.collect do |study_path|
114
- study_path.glob('*').select{|f| f.directory? }
115
- end.flatten.collect{|f| self.annotate f}
116
- else
117
- Dir.glob(File.join(study_dir, "*"))
118
- end.sort.collect do |dir|
119
- study = Study.setup(File.basename(dir))
120
- study.dir = study_dir.annotate(dir)
121
- study
122
- end
123
- end
124
-
125
- def dir
126
- @dir ||= if Path === Study.study_dir
127
- Study.study_dir[self]
128
- else
129
- Path.setup(File.join(Study.study_dir.dup, self), nil, Study)
130
- end
131
- @dir
132
- end
133
-
134
- def metadata
135
- @metadata ||= (dir["metadata.yaml"].yaml.extend IndiferentHash)
136
- end
137
-
138
- def users
139
- @users ||= metadata[:users] || []
140
- end
141
-
142
- #{{{ Attributes
143
- attr_accessor :organism
144
- def organism
145
- @organism ||= metadata["organism"]
146
- end
147
-
148
- def matrix_file(name)
149
- dir.matrices[name.to_s].produce.find
150
- end
151
-
152
- def matrices
153
- dir.matrices.glob('*').collect{|f| f.basename}
154
- end
155
-
156
- def matrix(type, format = "Ensembl Gene ID", organism = nil)
157
- organism = self.metadata[:organism] if organism.nil?
158
- raise "No matrices defined for study #{ self }" unless defined? matrices.empty?
159
- raise "No type specified" if type.nil?
160
- type = type.to_s
161
- raise "No matrix #{ type } defined for study #{ self }" unless matrices.include? type
162
- data = dir.matrices[type].data.find if dir.matrices[type].data.exists?
163
- if dir.matrices[type].identifiers.exists?
164
- identifiers = dir.matrices[type].identifiers.find
165
- else
166
- identifiers = Organism.identifiers(organism).find
167
- end
168
- samples = dir.matrices[type].samples.find if dir.matrices[type].samples.exists?
169
- samples = dir.samples.find if samples.nil? and dir.samples.exist?
170
- Matrix.new(data, samples, "count", format, organism, identifiers)
171
- end
172
- end
@@ -1,170 +0,0 @@
1
- require 'rbbt/entity/cnv'
2
-
3
- require 'rbbt/entity/study/cnv/genes'
4
- require 'rbbt/entity/study/cnv/samples'
5
-
6
- module StudyWorkflow
7
- helper :organism do
8
- study.metadata[:organism]
9
- end
10
-
11
- task :cnv_overview => :tsv do
12
- gene_overview = TSV.setup({},
13
- :key_field => "Ensembl Gene ID",
14
- :fields => ["Samples with gene lost", "Samples with gene gained"],
15
- :type => :double
16
- )
17
-
18
- cnv_samples = study.samples.select_by(:has_cnv?)
19
-
20
- log :samples, "Gathering affected samples"
21
- samples_gene_status = {}
22
- all_genes = []
23
- cnv_samples.each do |sample|
24
- samples_gene_status[sample] = {}
25
-
26
- lost_genes = sample.lost_genes
27
- lost_genes.clean_annotations.each do |gene|
28
- samples_gene_status[sample][gene] ||= [false, false]
29
- samples_gene_status[sample][gene][0] = true
30
- all_genes << gene
31
- end if lost_genes.any?
32
-
33
- gained_genes = sample.gained_genes
34
- gained_genes.clean_annotations.each do |gene|
35
- samples_gene_status[sample][gene] ||= [false, false]
36
- samples_gene_status[sample][gene][1] = true
37
- all_genes << gene
38
- end if gained_genes.any?
39
- end
40
-
41
- log :compiling, "Compiling result"
42
- all_genes.uniq.sort.each do |gene|
43
- gene_overview[gene] = []
44
- gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
45
- gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
46
- end
47
-
48
- gene_overview
49
- end
50
- end
51
-
52
- module Study
53
- def has_cnv?
54
- dir.cnv.exists?
55
- end
56
-
57
- def cnv_files
58
- dir.cnv.find.glob("*")
59
- end
60
-
61
- def cnv_cohort
62
- if @cnv_cohort.nil?
63
- @cnv_cohort = {}
64
- cnv_files.each do |f|
65
- sample = File.basename(f)
66
- Sample.setup(sample, self)
67
- cnvs = Open.read(f).split("\n").sort
68
- CNV.setup(cnvs, organism)
69
- @cnv_cohort[sample] = cnvs
70
- end
71
- end
72
- @cnv_cohort
73
- end
74
- end
75
-
76
- module Study
77
- property :recurrently_lost_genes => :single do |threshold|
78
- counts = {}
79
- self.samples.each do |sample|
80
- next unless sample.has_cnvs?
81
- puts sample
82
-
83
- genes = nil
84
- genes = sample.lost_genes.clean_annotations
85
- genes.each do |gene|
86
- counts[gene] ||= 0
87
- counts[gene] += 1
88
- end
89
- end
90
-
91
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
92
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
93
- end
94
-
95
- property :recurrently_gained_genes => :single do |threshold|
96
- counts = {}
97
- self.samples.each do |sample|
98
- next unless sample.has_cnvs?
99
- puts sample
100
-
101
- genes = nil
102
- genes = sample.gained_genes.clean_annotations
103
- genes.each do |gene|
104
- counts[gene] ||= 0
105
- counts[gene] += 1
106
- end
107
- end
108
-
109
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
110
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
111
- end
112
-
113
- property :gene_sample_cnv_matrix => :single do
114
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
115
- samples = []
116
- i = 0
117
- num_samples = cohort.length
118
- cnv_cohort.each do |sample,cnvs|
119
- cnvs.genes.compact.flatten.uniq.each do |gene|
120
- tsv[gene] ||= ["FALSE"] * num_samples
121
- tsv[gene][i] = "TRUE"
122
- end
123
- samples << sample
124
- i += 1
125
- end
126
-
127
- tsv.fields = samples
128
-
129
- tsv
130
- end
131
-
132
- property :gene_sample_gain_matrix => :single do
133
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
134
- samples = []
135
- i = 0
136
- num_samples = cohort.length
137
- cnv_cohort.each do |sample,cnvs|
138
- cnvs.select_by(:gain?).genes.compact.flatten.uniq.each do |gene|
139
- tsv[gene] ||= ["FALSE"] * num_samples
140
- tsv[gene][i] = "TRUE"
141
- end
142
- samples << sample
143
- i += 1
144
- end
145
-
146
- tsv.fields = samples
147
-
148
- tsv
149
- end
150
-
151
-
152
- property :gene_sample_loss_matrix => :single do
153
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
154
- samples = []
155
- i = 0
156
- num_samples = cohort.length
157
- cnv_cohort.each do |sample,cnvs|
158
- cnvs.select_by(:loss?).genes.compact.flatten.uniq.each do |gene|
159
- tsv[gene] ||= ["FALSE"] * num_samples
160
- tsv[gene][i] = "TRUE"
161
- end
162
- samples << sample
163
- i += 1
164
- end
165
-
166
- tsv.fields = samples
167
-
168
- tsv
169
- end
170
- end
@@ -1,28 +0,0 @@
1
- module Study
2
-
3
- property :samples_with_gene_gained => :single do
4
- samples_with_gene_gained = {}
5
- self.job(:cnv_overview).run.through do |gene, values|
6
- values.fields.zip(values).each do |sample, value|
7
- next unless value == "Gained"
8
- samples_with_gene_gained[gene] ||= []
9
- samples_with_gene_gained[gene] << sample
10
- end
11
- end
12
- samples_with_gene_gained
13
- end
14
-
15
-
16
- property :samples_with_gene_lost => :single do
17
- samples_with_gene_lost = {}
18
- self.job(:cnv_overview).run.through do |gene, values|
19
- values.fields.zip(values).each do |sample, value|
20
- next unless value == "Lost"
21
- samples_with_gene_lost[gene] ||= []
22
- samples_with_gene_lost[gene] << sample
23
- end
24
- end
25
- samples_with_gene_lost
26
- end
27
-
28
- end
@@ -1,39 +0,0 @@
1
- require 'rbbt/knowledge_base'
2
- require 'rbbt/workflow'
3
- Workflow.require_workflow "Genomics"
4
- require 'rbbt/entity/study'
5
- require 'rbbt/entity/study/cnv'
6
- require 'rbbt/entity/gene'
7
- require 'rbbt/entity/genomic_mutation'
8
-
9
- module Study
10
-
11
- self.study_registry[:sample_cnv_genes] = Proc.new{|study,database|
12
- tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID", "CNV Variation"], :type => :double, :namespace => study.organism)
13
-
14
- all_cnvs = CNV.setup(study.cnv_cohort.values.flatten, study.organism)
15
- cnv2genes = Misc.process_to_hash(all_cnvs){|cnvs| cnvs.genes }
16
-
17
- study.cnv_cohort.each do |sample,cnvs|
18
- Log.info sample
19
- genes = []
20
- variations = []
21
- cnvs.variation.zip(cnv2genes.chunked_values_at(cnvs)).each_with_index do |p,i|
22
- variation, genes = p
23
- Annotated.purge(genes).each{|gene| genes << gene; variations << variation }
24
- end
25
- tsv[sample] = [genes.to_a, variations.to_a]
26
- end
27
-
28
- tsv
29
- }
30
-
31
- end
32
-
33
- if __FILE__ == $0
34
- Workflow.require_workflow "ICGC"
35
- Study.study_dir = ICGC.root
36
- s = Study.setup("Glioblastoma_Multiforme-TCGA-US")
37
- puts s.knowledge_base.get_database(:sample_cnv_genes).value_peek
38
-
39
- end