rbbt-entities 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -27,10 +27,10 @@ module Genotype
27
27
  end
28
28
  end
29
29
 
30
- def self.extended(base)
31
- prev_genotype_cohort_extended(base) if self.respond_to? :prev_genotype_cohort_extended
30
+ def self.extended(cohort)
31
+ prev_genotype_cohort_extended(cohort) if self.respond_to? :prev_genotype_cohort_extended
32
32
 
33
- class << base
33
+ class << cohort
34
34
  attr_accessor :metagenotype
35
35
 
36
36
  def jobname
@@ -43,24 +43,29 @@ module Genotype
43
43
 
44
44
  def metagenotype
45
45
  if @metagenotype.nil?
46
- @metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].watson)
46
+ @metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].orig_watson)
47
47
  @metagenotype.extend Genotype unless Genotype === @metagenotype
48
48
  end
49
49
  @metagenotype
50
50
  end
51
- end unless base.respond_to? :metagenotype
51
+ end unless cohort.respond_to? :metagenotype
52
52
 
53
- base.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
53
+ cohort.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
54
54
 
55
- base.helper :metagenotype do
56
- base.metagenotype
55
+ cohort.helper :metagenotype do
56
+ cohort.metagenotype
57
57
  end
58
58
 
59
- base.helper :samples do
60
- base
59
+ cohort.helper :samples do
60
+ cohort
61
61
  end
62
62
 
63
- NamedArray.setup(base, base.collect{|base| base.jobname})
63
+ NamedArray.setup(cohort, cohort.collect{|genotype| genotype.jobname})
64
+ end
65
+
66
+ def subset(genotypes)
67
+ new = self.values_at *(genotypes & fields)
68
+ new.extend Cohort
64
69
  end
65
70
 
66
71
  returns "Ensembl Gene ID"
@@ -70,12 +75,13 @@ module Genotype
70
75
  end
71
76
 
72
77
  returns "Ensembl Gene ID"
73
- task :damaged_genes => :array do
78
+ input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
79
+ input :threshold, :float, "from 0 to 1", 0.8
80
+ task :damaged_genes => :array do |methods, threshold|
74
81
  set_info :organism, metagenotype.organism
75
- samples.collect{|genotype| genotype.damaged_genes}.flatten.uniq
82
+ samples.collect{|genotype| genotype.damaged_genes(:methods => methods, :threshold => threshold)}.flatten.uniq
76
83
  end
77
84
 
78
-
79
85
  returns "Ensembl Gene ID"
80
86
  task :recurrent_genes => :array do
81
87
  set_info :organism, metagenotype.organism
@@ -86,8 +92,10 @@ module Genotype
86
92
 
87
93
  %w(damaged_genes recurrent_genes all_affected_genes).each do |name|
88
94
  define_method name do |*args|
95
+ options = args.first
89
96
  @cache ||= {}
90
- @cache[[name, args]] ||= self.job(name, self.jobname).run
97
+ key = [name, Misc.hash2md5(options || {})]
98
+ @cache[key] ||= self.job(name, self.jobname, options || {}).run
91
99
  end
92
100
  end
93
101
 
@@ -119,19 +127,27 @@ module Genotype
119
127
  end
120
128
 
121
129
  returns "Ensembl Gene ID"
122
- input :threshold, :float, "from 0 to 1", 0.5
123
- task :with_damaged_isoforms => :array do |threshold|
130
+ task :with_non_synonymous_mutations => :array do
131
+ set_info :organism, genotype.organism
132
+ genotype.mutated_isoforms.flatten.compact.reject{|mutated_isoform| ["SYNONYMOUS", "UTR"].include? mutated_isoform.consequence}.transcript.gene.uniq
133
+ end
134
+
135
+ returns "Ensembl Gene ID"
136
+ input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
137
+ input :threshold, :float, "from 0 to 1", 0.8
138
+ task :with_damaged_isoforms => :array do |methods,threshold|
124
139
  set_info :organism, genotype.organism
125
- mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores}
140
+ mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores(methods)}
126
141
  genotype.select{|mutation| if mutation.mutated_isoforms then mutated_isoform_damage.values_at(*mutation.mutated_isoforms.flatten.compact).select{|score| not score.nil? and score > threshold}.any? else false; end}.genes.flatten.uniq.clean_annotations
127
142
  end
128
143
 
129
144
  returns "Ensembl Gene ID"
130
145
  task :truncated => :array do
131
146
  set_info :organism, genotype.organism
132
- MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").
133
- select{|isoform_mutation| isoform_mutation.truncated }.
134
- protein.gene.to("Ensembl Gene ID").uniq.clean_annotations
147
+ truncated_isoforms = MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").select{|isoform_mutation| isoform_mutation.truncated }
148
+ proteins = truncated_isoforms.protein
149
+ genes = proteins.gene
150
+ genes.to("Ensembl Gene ID").uniq.clean_annotations
135
151
  end
136
152
 
137
153
  returns "Ensembl Gene ID"
@@ -152,10 +168,12 @@ module Genotype
152
168
  (with_damaged_isoforms + truncated + affected_exon_junctions).uniq
153
169
  end
154
170
 
155
- %w(all_affected_genes damaged_genes truncated with_damaged_isoforms affected_exon_junctions long_genes recurrent_genes).each do |name|
171
+ %w(all_affected_genes damaged_genes truncated with_damaged_isoforms with_non_synonymous_mutations affected_exon_junctions long_genes recurrent_genes).each do |name|
156
172
  define_method name do |*args|
173
+ options = args.first
157
174
  @cache ||= {}
158
- @cache[[name, args]] ||= self.job(name, self.jobname).run
175
+ key = [name, Misc.hash2md5(options || {})]
176
+ @cache[key] ||= self.job(name, self.jobname, options || {}).run
159
177
  end
160
178
  end
161
179
  end
@@ -3,24 +3,3 @@ require 'rbbt/workflow'
3
3
  require 'rbbt/sources/go'
4
4
  require 'rbbt/sources/organism'
5
5
  require 'rbbt/entity/gene'
6
-
7
- module GOTerm
8
- extend Entity
9
- self.annotation :organism
10
-
11
- self.format = ["GO Term", "GO ID"]
12
-
13
- def name
14
- if Array === self
15
- self.collect{|id| GO.id2name(id)}
16
- else
17
- GO.id2name(self)
18
- end
19
- end
20
-
21
- def genes
22
- go2genes = Organism.gene_go(organism).tsv(:key_field => "GO ID", :fields => ["Ensembl Gene ID"], :merge => true, :persist => true)
23
- go2genes.unnamed = true
24
- Gene.setup(go2genes[self].first, "Ensembl Gene ID", organism)
25
- end
26
- end
@@ -4,6 +4,7 @@ require 'rbbt/sources/organism'
4
4
  require 'rbbt/mutation/mutation_assessor'
5
5
  require 'rbbt/mutation/sift'
6
6
  require 'rbbt/entity/protein'
7
+ require 'rbbt/sources/uniprot'
7
8
  require 'rbbt/entity/gene'
8
9
  require 'nokogiri'
9
10
 
@@ -13,31 +14,41 @@ module MutatedIsoform
13
14
 
14
15
  self.format = "Mutated Isoform"
15
16
 
16
- property :protein do
17
- if Array === self
18
- Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
19
- else
20
- Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
21
- end
17
+ property :protein => :array2single do
18
+ Protein.setup(self.collect{|mutation| mutation.split(":").first if mutation =~ /^ENSP/}, "Ensembl Protein ID", organism)
22
19
  end
20
+ persist :protein
23
21
 
24
- property :change => :single2array do
25
- self.split(":").last
22
+ property :transcript => :array2single do
23
+ begin
24
+ protein = self.protein
25
+ Transcript.setup(protein.transcript.zip(self.collect{|mutation| mutation.split(":").first}).collect{|p| p.compact.first}, "Ensembl Transcript ID", organism)
26
+ end
26
27
  end
28
+ persist :transcript
27
29
 
28
- property :position => :single2array do
29
- if change.match(/[^\d](\d+)[^\d]/)
30
- $1.to_i
31
- else
32
- nil
33
- end
30
+ property :change => :array2single do
31
+ self.collect{|mi| mi.split(":").last}
32
+ end
33
+ persist :change
34
+
35
+ property :position => :array2single do
36
+ change.collect{|c|
37
+ if c.match(/[^\d](\d+)[^\d]/)
38
+ $1.to_i
39
+ else
40
+ nil
41
+ end
42
+ }
34
43
  end
35
-
44
+ persist :position
45
+
36
46
  property :ensembl_protein_image_url => :single2array do
37
47
  ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
38
48
  "http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
39
49
  end
40
-
50
+ persist :ensembl_protein_image_url
51
+
41
52
  property :marked_svg => :single2array do
42
53
  svg = Open.read(protein.ensembl_protein_image_url)
43
54
  seq_len = protein.sequence_length
@@ -56,10 +67,11 @@ module MutatedIsoform
56
67
  svg
57
68
  end
58
69
  end
70
+ persist :marked_svg
59
71
 
60
72
  ASTERISK = "*"[0]
61
73
  CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
62
- property :consecuence => :single2array do
74
+ property :consequence => :single2array do
63
75
  prot, change = self.split(":")
64
76
 
65
77
  case
@@ -79,14 +91,15 @@ module MutatedIsoform
79
91
  "MISS-SENSE"
80
92
  end
81
93
  end
94
+ persist :consequence
82
95
 
83
96
  property :truncated => :array2single do
84
- @truncated ||= begin
85
- protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
86
- self.collect do |isoform_mutation|
97
+ begin
98
+ protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
99
+ self.collect do |isoform_mutation|
87
100
 
88
- next if isoform_mutation.consecuence != "FRAMESHIFT" and isoform_mutation.consecuence != "NONSENSE"
89
- protein = isoform_mutation.protein
101
+ next if isoform_mutation.consequence != "FRAMESHIFT" and isoform_mutation.consequence != "NONSENSE"
102
+ protein = isoform_mutation.protein
90
103
  position = isoform_mutation.position
91
104
  sequence_length = protein2sequence_length[protein]
92
105
 
@@ -100,80 +113,124 @@ module MutatedIsoform
100
113
  end
101
114
  end
102
115
  end
103
-
104
116
  end
105
-
106
- property :damage_scores => :array2single do
107
- @damage_scores ||= begin
108
- sift_scores.zip(mutation_assessor_scores).collect{|p|
109
- p = p.compact
110
- if p.empty?
111
- nil
112
- else
113
- p.inject(0.0){|acc, e| acc += e} / p.length
114
- end
115
- }
116
- end
117
+ persist :truncated
118
+
119
+ property :damage_scores => :array2single do |*args|
120
+ begin
121
+ methods = args.first
122
+ methods = [:sift, :mutation_assessor] if methods.nil?
123
+ methods = [methods] unless Array === methods
124
+ values = methods.collect{|method|
125
+ case method.to_sym
126
+ when :sift
127
+ sift_scores
128
+ when :mutation_assessor
129
+ mutation_assessor_scores
130
+ else
131
+ raise "Unknown predictive method: #{ method }"
132
+ end
133
+ }
134
+ if values.compact.empty?
135
+ return [nil] * self.length
136
+ else
137
+ scores = values.shift
138
+ scores = scores.zip(*values)
139
+
140
+ scores.collect{|p|
141
+ p = p.compact
142
+ if p.empty?
143
+ nil
144
+ else
145
+ p.inject(0.0){|acc, e| acc += e} / p.length
146
+ end
147
+ }
148
+ end
149
+ end
150
+ end
151
+ persist :damage_scores
152
+
153
+ property :damaged? => :array2single do |*args|
154
+ begin
155
+ methods, threshold = args
156
+ threshold = 0.8 if threshold.nil?
157
+ damage_scores = self.damage_scores(methods)
158
+ truncated = self.truncated
159
+ damage_scores.zip(truncated).collect{|damage, truncated| truncated or (not damage.nil? and damage > threshold) }
160
+ end
117
161
  end
162
+ persist :damaged?
118
163
 
119
164
  property :sift_scores => :array2single do
120
- @sift_scores ||= begin
121
- missense = self.select{|iso_mut| iso_mut.consecuence == "MISS-SENSE"}
165
+ begin
166
+ missense = self.select{|iso_mut| iso_mut.consequence == "MISS-SENSE"}
122
167
 
123
- values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
124
- v.nil? ? nil : v["Prediction"]
125
- }
168
+ values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
169
+ v.nil? ? nil : 1.0 - v["Score 1"].to_f
170
+ }
126
171
 
127
- range = {nil => nil,
128
- "" => nil,
129
- "TOLERATED" => 0,
130
- "*DAMAGING" => 1,
131
- "DAMAGING" => 1}
172
+ values
132
173
 
133
- range.values_at *values
134
- end
174
+ #range = {nil => nil,
175
+ # "" => nil,
176
+ # "TOLERATED" => 0,
177
+ # "*DAMAGING" => 1,
178
+ # "DAMAGING" => 1}
179
+
180
+ #range.values_at *values
181
+ end
135
182
  end
183
+ persist :sift_scores
136
184
 
137
185
  property :mutation_assessor_scores => :array2single do
138
- @mutation_assesor_scores ||= begin
139
- missense = self.select{|mutation| mutation.consecuence == "MISS-SENSE"}
140
-
141
- correspondance = {}
142
- list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
143
- prot, change = mutation.split(":")
144
- next if uniprot.nil?
145
- uniprot_change = [uniprot, change]
146
- correspondance[uniprot_change] ||= []
147
- correspondance[uniprot_change] << mutation
148
- uniprot_change
149
- end.compact
150
-
151
- #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
152
- return [nil] * self.length if list.empty?
153
-
154
- tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
155
-
156
- #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
157
- return [nil] * self.length if tsv.empty?
158
-
159
- new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
160
-
161
- tsv.each do |key, values|
162
- correspondance[key.split(" ")].each do |mutation|
163
- new[mutation] = values["Func. Impact"]
164
- end
165
- end
166
-
167
-
168
- range = {nil => nil,
169
- "" => nil,
170
- "neutral" => 0,
171
- "low" => 0.3,
172
- "medium" => 0.6,
173
- "high" => 1}
174
-
175
- range.values_at *new.values_at(*self)
176
- end
186
+ begin
187
+ missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
188
+
189
+ correspondance = {}
190
+ list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
191
+ prot, change = mutation.split(":")
192
+ next if uniprot.nil?
193
+ uniprot_change = [uniprot.upcase, change.upcase]
194
+ correspondance[uniprot_change] ||= []
195
+ correspondance[uniprot_change] << mutation
196
+ uniprot_change
197
+ end.compact
198
+
199
+ #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
200
+ return [nil] * self.length if list.empty?
201
+
202
+ tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
203
+
204
+ #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
205
+ return [nil] * self.length if tsv.empty?
206
+
207
+ new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
208
+
209
+ tsv.each do |key, values|
210
+ uniprot, change = key.split(" ")
211
+ uniprot_change = [uniprot.upcase, change.upcase]
212
+ correspondance[uniprot_change].each do |mutation|
213
+ new[mutation] = values["Func. Impact"]
214
+ end
215
+ end
216
+
217
+
218
+ range = {nil => nil,
219
+ "" => nil,
220
+ "neutral" => 0,
221
+ "low" => 0.5,
222
+ "medium" => 0.7,
223
+ "high" => 1.0}
224
+
225
+ range.values_at *new.values_at(*self)
226
+ end
177
227
  end
228
+ persist :mutation_assessor_scores
178
229
 
230
+ property :pdbs => :single do
231
+ uniprot = self.transcript.protein.uniprot
232
+ next if uniprot.nil?
233
+ Uniprot.pdbs_covering_aa_position(uniprot, self.position)
234
+ end
235
+ persist :pdbs
179
236
  end
@@ -6,14 +6,24 @@ module PMID
6
6
 
7
7
  self.format = "PMID"
8
8
 
9
+ property :article => :array2single do
10
+ PubMed.get_article(self).values_at(*self)
11
+ end
12
+ persist :article
13
+
9
14
  property :title => :array2single do
10
- @title ||= begin
11
- PubMed.get_article(self).values_at(*self).collect{|article| article.nil? ? nil : article.title}
12
- end
15
+ article.collect{|a| a.nil? ? nil : a.title}
16
+ end
17
+ persist :title
18
+
19
+ property :text => :array2single do
20
+ article.collect{|a| a.nil? ? nil : a.text}
13
21
  end
22
+ persist :text
14
23
 
15
24
  property :pubmed_url => :single2array do
16
25
  "<a class='pmid' href='http://www.ncbi.nlm.nih.gov/pubmed/#{self}'>#{ self }</a>"
17
26
  end
27
+ persist :pubmed_url
18
28
  end
19
29