rbbt-study 0.2.30 → 0.2.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 634c54bf8560389b8c5c49e996d968f37883b720
4
- data.tar.gz: c848a8dd97e570a123b913a747cb3bb63787538e
3
+ metadata.gz: ef1247f0c3941dd61e65fcb4a198e42967b30b95
4
+ data.tar.gz: 0008d2cf03e10e104a86631d387fb53fd825c639
5
5
  SHA512:
6
- metadata.gz: 005e523b4449ef10dc05bf470b60ad713990e47d904dababdbb5d0a682db3782df5f1f713565fb6d2281c745281521856d930bd283b4691ee89ffa8776b79345
7
- data.tar.gz: dd9ce8ac4736a08d5c4bee8786b843f3c0796bb8e8cb8ca81d4d0d08218888785cb49aaa76a221d4cfbb49379b766e6b5577417d8e1566c612e42536c26686db
6
+ metadata.gz: f7b4c891e0500ad8bb18dc098aa5830fd4d80d8d201722139b298567aaf7dcbb02c48d2e8f964bc85649d385fe587a40d49635ba3b83b46feb36606d3f10acc3
7
+ data.tar.gz: 65ca74cc1659633d89eec772c36d3860bbac75744033c891c5cc77c8d0ba048dd75436345d549b1824f22d5fc858350e65ec72b4cf1535c5c276c587f0f9cdfa
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-study
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.30
4
+ version: 0.2.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-14 00:00:00.000000000 Z
11
+ date: 2014-08-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem add the study entity with suport for NGS, Microarray and other
14
14
  types of data
@@ -21,28 +21,6 @@ extra_rdoc_files:
21
21
  files:
22
22
  - LICENSE.txt
23
23
  - README.rdoc
24
- - lib/rbbt/entity/study.rb
25
- - lib/rbbt/entity/study/cnv.rb
26
- - lib/rbbt/entity/study/cnv/genes.rb
27
- - lib/rbbt/entity/study/cnv/knowledge_base.rb
28
- - lib/rbbt/entity/study/cnv/samples.rb
29
- - lib/rbbt/entity/study/enrichment.rb
30
- - lib/rbbt/entity/study/expression.rb
31
- - lib/rbbt/entity/study/features.rb
32
- - lib/rbbt/entity/study/genes.rb
33
- - lib/rbbt/entity/study/genotypes.rb
34
- - lib/rbbt/entity/study/genotypes/enrichment.rb
35
- - lib/rbbt/entity/study/genotypes/genes.rb
36
- - lib/rbbt/entity/study/genotypes/knowledge_base.rb
37
- - lib/rbbt/entity/study/genotypes/mutations.rb
38
- - lib/rbbt/entity/study/genotypes/samples.rb
39
- - lib/rbbt/entity/study/knowledge_base.rb
40
- - lib/rbbt/entity/study/methylation.rb
41
- - lib/rbbt/entity/study/methylation/samples.rb
42
- - lib/rbbt/entity/study/mutations.rb
43
- - lib/rbbt/entity/study/plots.rb
44
- - lib/rbbt/entity/study/samples.rb
45
- - lib/rbbt/entity/study/snp.rb
46
24
  - share/R/data.R
47
25
  - share/R/plots.R
48
26
  homepage: http://github.com/mikisvaz/rbbt-study
@@ -1,172 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/misc'
3
-
4
- require 'rbbt/entity'
5
- require 'rbbt/resource'
6
- require 'rbbt/workflow'
7
-
8
- Workflow.require_workflow "Genomics"
9
-
10
- require 'rbbt/entity/study'
11
- require 'rbbt/entity/study/knowledge_base'
12
- require 'rbbt/entity/study/samples'
13
-
14
-
15
- module StudyWorkflow
16
- extend Workflow
17
-
18
- class << self
19
- attr_accessor :study
20
- end
21
-
22
- def self.workdir
23
- @workdir ||= Rbbt.var.jobs["Study"].find
24
- end
25
-
26
- helper :study do
27
- @study
28
- end
29
-
30
- helper :dir do
31
- study.dir
32
- end
33
-
34
- helper :organism do
35
- study.metadata[:organism]
36
- end
37
-
38
- def self.job(*args)
39
- super(*args).tap{|s| s.instance_variable_set("@study", @study) }
40
- end
41
- end
42
-
43
- module Study
44
- extend Entity
45
- extend Resource
46
- include LocalPersist
47
-
48
- class << self
49
- attr_accessor :study_dir
50
- def study_dir
51
- @study_dir ||= begin
52
- case
53
- when (not defined?(Rbbt))
54
- Path.setup(File.join(ENV["HOME"], '.studies'))
55
- when Rbbt.etc.study_dir.exists?
56
- Path.setup(Rbbt.etc.study_dir.read.chomp)
57
- else
58
- Rbbt.studies.find
59
- end
60
- end
61
- end
62
- end
63
-
64
- attr_accessor :workflow, :dir
65
-
66
- def job(task, *args)
67
- name, inputs = args
68
- if inputs.nil? and Hash === name
69
- inputs = name
70
- name = nil
71
- end
72
- name = self if name.nil? or name == :self or name == "self"
73
- step = workflow.job(task, name, {:organism => metadata[:organism], :watson => metadata[:watson]}.merge(inputs || {}))
74
- step.instance_variable_set(:@study, self)
75
- step
76
- end
77
-
78
- def workflow(&block)
79
- if block_given?
80
- @workflow.instance_eval &block
81
- else
82
- @workflow
83
- end
84
- end
85
-
86
- def self.annotation_repo
87
- @annotation_repo ||= Rbbt.var.cache.annotation_repo.find
88
- end
89
-
90
- def self.extended(base)
91
- base.workflow = StudyWorkflow.clone
92
- base.workflow.study = base
93
-
94
- if File.exists?(setup_file = File.join(base.dir, 'setup.rb'))
95
- base.instance_eval Open.read(setup_file), setup_file
96
- end
97
-
98
- base.local_persist_dir = Rbbt.var.cache.studies[base].persistence.find
99
-
100
- base
101
- end
102
-
103
- def self.studies
104
- Dir.glob(File.join(Path === study_dir ? study_dir.find : study_dir, '*')).
105
- select{|f| File.directory? f}.sort.collect{|s| Study.setup(File.basename(s))}
106
- end
107
-
108
- def self.studies
109
- case study_dir
110
- when nil
111
- []
112
- when Path
113
- study_dir.find_all.collect do |study_path|
114
- study_path.glob('*').select{|f| f.directory? }
115
- end.flatten.collect{|f| self.annotate f}
116
- else
117
- Dir.glob(File.join(study_dir, "*"))
118
- end.sort.collect do |dir|
119
- study = Study.setup(File.basename(dir))
120
- study.dir = study_dir.annotate(dir)
121
- study
122
- end
123
- end
124
-
125
- def dir
126
- @dir ||= if Path === Study.study_dir
127
- Study.study_dir[self]
128
- else
129
- Path.setup(File.join(Study.study_dir.dup, self), nil, Study)
130
- end
131
- @dir
132
- end
133
-
134
- def metadata
135
- @metadata ||= (dir["metadata.yaml"].yaml.extend IndiferentHash)
136
- end
137
-
138
- def users
139
- @users ||= metadata[:users] || []
140
- end
141
-
142
- #{{{ Attributes
143
- attr_accessor :organism
144
- def organism
145
- @organism ||= metadata["organism"]
146
- end
147
-
148
- def matrix_file(name)
149
- dir.matrices[name.to_s].produce.find
150
- end
151
-
152
- def matrices
153
- dir.matrices.glob('*').collect{|f| f.basename}
154
- end
155
-
156
- def matrix(type, format = "Ensembl Gene ID", organism = nil)
157
- organism = self.metadata[:organism] if organism.nil?
158
- raise "No matrices defined for study #{ self }" unless defined? matrices.empty?
159
- raise "No type specified" if type.nil?
160
- type = type.to_s
161
- raise "No matrix #{ type } defined for study #{ self }" unless matrices.include? type
162
- data = dir.matrices[type].data.find if dir.matrices[type].data.exists?
163
- if dir.matrices[type].identifiers.exists?
164
- identifiers = dir.matrices[type].identifiers.find
165
- else
166
- identifiers = Organism.identifiers(organism).find
167
- end
168
- samples = dir.matrices[type].samples.find if dir.matrices[type].samples.exists?
169
- samples = dir.samples.find if samples.nil? and dir.samples.exist?
170
- Matrix.new(data, samples, "count", format, organism, identifiers)
171
- end
172
- end
@@ -1,170 +0,0 @@
1
- require 'rbbt/entity/cnv'
2
-
3
- require 'rbbt/entity/study/cnv/genes'
4
- require 'rbbt/entity/study/cnv/samples'
5
-
6
- module StudyWorkflow
7
- helper :organism do
8
- study.metadata[:organism]
9
- end
10
-
11
- task :cnv_overview => :tsv do
12
- gene_overview = TSV.setup({},
13
- :key_field => "Ensembl Gene ID",
14
- :fields => ["Samples with gene lost", "Samples with gene gained"],
15
- :type => :double
16
- )
17
-
18
- cnv_samples = study.samples.select_by(:has_cnv?)
19
-
20
- log :samples, "Gathering affected samples"
21
- samples_gene_status = {}
22
- all_genes = []
23
- cnv_samples.each do |sample|
24
- samples_gene_status[sample] = {}
25
-
26
- lost_genes = sample.lost_genes
27
- lost_genes.clean_annotations.each do |gene|
28
- samples_gene_status[sample][gene] ||= [false, false]
29
- samples_gene_status[sample][gene][0] = true
30
- all_genes << gene
31
- end if lost_genes.any?
32
-
33
- gained_genes = sample.gained_genes
34
- gained_genes.clean_annotations.each do |gene|
35
- samples_gene_status[sample][gene] ||= [false, false]
36
- samples_gene_status[sample][gene][1] = true
37
- all_genes << gene
38
- end if gained_genes.any?
39
- end
40
-
41
- log :compiling, "Compiling result"
42
- all_genes.uniq.sort.each do |gene|
43
- gene_overview[gene] = []
44
- gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][0]}.collect{|sample, gene_status| sample}
45
- gene_overview[gene] << samples_gene_status.select{|sample, gene_status| gene_status.include? gene and gene_status[gene][1]}.collect{|sample, gene_status| sample}
46
- end
47
-
48
- gene_overview
49
- end
50
- end
51
-
52
- module Study
53
- def has_cnv?
54
- dir.cnv.exists?
55
- end
56
-
57
- def cnv_files
58
- dir.cnv.find.glob("*")
59
- end
60
-
61
- def cnv_cohort
62
- if @cnv_cohort.nil?
63
- @cnv_cohort = {}
64
- cnv_files.each do |f|
65
- sample = File.basename(f)
66
- Sample.setup(sample, self)
67
- cnvs = Open.read(f).split("\n").sort
68
- CNV.setup(cnvs, organism)
69
- @cnv_cohort[sample] = cnvs
70
- end
71
- end
72
- @cnv_cohort
73
- end
74
- end
75
-
76
- module Study
77
- property :recurrently_lost_genes => :single do |threshold|
78
- counts = {}
79
- self.samples.each do |sample|
80
- next unless sample.has_cnvs?
81
- puts sample
82
-
83
- genes = nil
84
- genes = sample.lost_genes.clean_annotations
85
- genes.each do |gene|
86
- counts[gene] ||= 0
87
- counts[gene] += 1
88
- end
89
- end
90
-
91
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
92
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
93
- end
94
-
95
- property :recurrently_gained_genes => :single do |threshold|
96
- counts = {}
97
- self.samples.each do |sample|
98
- next unless sample.has_cnvs?
99
- puts sample
100
-
101
- genes = nil
102
- genes = sample.gained_genes.clean_annotations
103
- genes.each do |gene|
104
- counts[gene] ||= 0
105
- counts[gene] += 1
106
- end
107
- end
108
-
109
- recurrent = counts.select{|k,c| c >= threshold }.collect{|k,v| k }
110
- Gene.setup(recurrent, "Ensembl Gene ID", organism)
111
- end
112
-
113
- property :gene_sample_cnv_matrix => :single do
114
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
115
- samples = []
116
- i = 0
117
- num_samples = cohort.length
118
- cnv_cohort.each do |sample,cnvs|
119
- cnvs.genes.compact.flatten.uniq.each do |gene|
120
- tsv[gene] ||= ["FALSE"] * num_samples
121
- tsv[gene][i] = "TRUE"
122
- end
123
- samples << sample
124
- i += 1
125
- end
126
-
127
- tsv.fields = samples
128
-
129
- tsv
130
- end
131
-
132
- property :gene_sample_gain_matrix => :single do
133
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
134
- samples = []
135
- i = 0
136
- num_samples = cohort.length
137
- cnv_cohort.each do |sample,cnvs|
138
- cnvs.select_by(:gain?).genes.compact.flatten.uniq.each do |gene|
139
- tsv[gene] ||= ["FALSE"] * num_samples
140
- tsv[gene][i] = "TRUE"
141
- end
142
- samples << sample
143
- i += 1
144
- end
145
-
146
- tsv.fields = samples
147
-
148
- tsv
149
- end
150
-
151
-
152
- property :gene_sample_loss_matrix => :single do
153
- tsv = TSV.setup({}, :key_field => "Ensembl Gene ID", :namespace => organism, :type => :list)
154
- samples = []
155
- i = 0
156
- num_samples = cohort.length
157
- cnv_cohort.each do |sample,cnvs|
158
- cnvs.select_by(:loss?).genes.compact.flatten.uniq.each do |gene|
159
- tsv[gene] ||= ["FALSE"] * num_samples
160
- tsv[gene][i] = "TRUE"
161
- end
162
- samples << sample
163
- i += 1
164
- end
165
-
166
- tsv.fields = samples
167
-
168
- tsv
169
- end
170
- end
@@ -1,28 +0,0 @@
1
- module Study
2
-
3
- property :samples_with_gene_gained => :single do
4
- samples_with_gene_gained = {}
5
- self.job(:cnv_overview).run.through do |gene, values|
6
- values.fields.zip(values).each do |sample, value|
7
- next unless value == "Gained"
8
- samples_with_gene_gained[gene] ||= []
9
- samples_with_gene_gained[gene] << sample
10
- end
11
- end
12
- samples_with_gene_gained
13
- end
14
-
15
-
16
- property :samples_with_gene_lost => :single do
17
- samples_with_gene_lost = {}
18
- self.job(:cnv_overview).run.through do |gene, values|
19
- values.fields.zip(values).each do |sample, value|
20
- next unless value == "Lost"
21
- samples_with_gene_lost[gene] ||= []
22
- samples_with_gene_lost[gene] << sample
23
- end
24
- end
25
- samples_with_gene_lost
26
- end
27
-
28
- end
@@ -1,39 +0,0 @@
1
- require 'rbbt/knowledge_base'
2
- require 'rbbt/workflow'
3
- Workflow.require_workflow "Genomics"
4
- require 'rbbt/entity/study'
5
- require 'rbbt/entity/study/cnv'
6
- require 'rbbt/entity/gene'
7
- require 'rbbt/entity/genomic_mutation'
8
-
9
- module Study
10
-
11
- self.study_registry[:sample_cnv_genes] = Proc.new{|study,database|
12
- tsv = TSV.setup({}, :key_field => "Sample", :fields => ["Ensembl Gene ID", "CNV Variation"], :type => :double, :namespace => study.organism)
13
-
14
- all_cnvs = CNV.setup(study.cnv_cohort.values.flatten, study.organism)
15
- cnv2genes = Misc.process_to_hash(all_cnvs){|cnvs| cnvs.genes }
16
-
17
- study.cnv_cohort.each do |sample,cnvs|
18
- Log.info sample
19
- genes = []
20
- variations = []
21
- cnvs.variation.zip(cnv2genes.chunked_values_at(cnvs)).each_with_index do |p,i|
22
- variation, genes = p
23
- Annotated.purge(genes).each{|gene| genes << gene; variations << variation }
24
- end
25
- tsv[sample] = [genes.to_a, variations.to_a]
26
- end
27
-
28
- tsv
29
- }
30
-
31
- end
32
-
33
- if __FILE__ == $0
34
- Workflow.require_workflow "ICGC"
35
- Study.study_dir = ICGC.root
36
- s = Study.setup("Glioblastoma_Multiforme-TCGA-US")
37
- puts s.knowledge_base.get_database(:sample_cnv_genes).value_peek
38
-
39
- end