rbbt-entities 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,10 +27,10 @@ module Genotype
27
27
  end
28
28
  end
29
29
 
30
- def self.extended(base)
31
- prev_genotype_cohort_extended(base) if self.respond_to? :prev_genotype_cohort_extended
30
+ def self.extended(cohort)
31
+ prev_genotype_cohort_extended(cohort) if self.respond_to? :prev_genotype_cohort_extended
32
32
 
33
- class << base
33
+ class << cohort
34
34
  attr_accessor :metagenotype
35
35
 
36
36
  def jobname
@@ -43,24 +43,29 @@ module Genotype
43
43
 
44
44
  def metagenotype
45
45
  if @metagenotype.nil?
46
- @metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].watson)
46
+ @metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].orig_watson)
47
47
  @metagenotype.extend Genotype unless Genotype === @metagenotype
48
48
  end
49
49
  @metagenotype
50
50
  end
51
- end unless base.respond_to? :metagenotype
51
+ end unless cohort.respond_to? :metagenotype
52
52
 
53
- base.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
53
+ cohort.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
54
54
 
55
- base.helper :metagenotype do
56
- base.metagenotype
55
+ cohort.helper :metagenotype do
56
+ cohort.metagenotype
57
57
  end
58
58
 
59
- base.helper :samples do
60
- base
59
+ cohort.helper :samples do
60
+ cohort
61
61
  end
62
62
 
63
- NamedArray.setup(base, base.collect{|base| base.jobname})
63
+ NamedArray.setup(cohort, cohort.collect{|genotype| genotype.jobname})
64
+ end
65
+
66
+ def subset(genotypes)
67
+ new = self.values_at *(genotypes & fields)
68
+ new.extend Cohort
64
69
  end
65
70
 
66
71
  returns "Ensembl Gene ID"
@@ -70,12 +75,13 @@ module Genotype
70
75
  end
71
76
 
72
77
  returns "Ensembl Gene ID"
73
- task :damaged_genes => :array do
78
+ input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
79
+ input :threshold, :float, "from 0 to 1", 0.8
80
+ task :damaged_genes => :array do |methods, threshold|
74
81
  set_info :organism, metagenotype.organism
75
- samples.collect{|genotype| genotype.damaged_genes}.flatten.uniq
82
+ samples.collect{|genotype| genotype.damaged_genes(:methods => methods, :threshold => threshold)}.flatten.uniq
76
83
  end
77
84
 
78
-
79
85
  returns "Ensembl Gene ID"
80
86
  task :recurrent_genes => :array do
81
87
  set_info :organism, metagenotype.organism
@@ -86,8 +92,10 @@ module Genotype
86
92
 
87
93
  %w(damaged_genes recurrent_genes all_affected_genes).each do |name|
88
94
  define_method name do |*args|
95
+ options = args.first
89
96
  @cache ||= {}
90
- @cache[[name, args]] ||= self.job(name, self.jobname).run
97
+ key = [name, Misc.hash2md5(options || {})]
98
+ @cache[key] ||= self.job(name, self.jobname, options || {}).run
91
99
  end
92
100
  end
93
101
 
@@ -119,19 +127,27 @@ module Genotype
119
127
  end
120
128
 
121
129
  returns "Ensembl Gene ID"
122
- input :threshold, :float, "from 0 to 1", 0.5
123
- task :with_damaged_isoforms => :array do |threshold|
130
+ task :with_non_synonymous_mutations => :array do
131
+ set_info :organism, genotype.organism
132
+ genotype.mutated_isoforms.flatten.compact.reject{|mutated_isoform| ["SYNONYMOUS", "UTR"].include? mutated_isoform.consequence}.transcript.gene.uniq
133
+ end
134
+
135
+ returns "Ensembl Gene ID"
136
+ input :methods, :array, "Predictive methods", [:sift, :mutation_assessor]
137
+ input :threshold, :float, "from 0 to 1", 0.8
138
+ task :with_damaged_isoforms => :array do |methods,threshold|
124
139
  set_info :organism, genotype.organism
125
- mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores}
140
+ mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores(methods)}
126
141
  genotype.select{|mutation| if mutation.mutated_isoforms then mutated_isoform_damage.values_at(*mutation.mutated_isoforms.flatten.compact).select{|score| not score.nil? and score > threshold}.any? else false; end}.genes.flatten.uniq.clean_annotations
127
142
  end
128
143
 
129
144
  returns "Ensembl Gene ID"
130
145
  task :truncated => :array do
131
146
  set_info :organism, genotype.organism
132
- MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").
133
- select{|isoform_mutation| isoform_mutation.truncated }.
134
- protein.gene.to("Ensembl Gene ID").uniq.clean_annotations
147
+ truncated_isoforms = MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").select{|isoform_mutation| isoform_mutation.truncated }
148
+ proteins = truncated_isoforms.protein
149
+ genes = proteins.gene
150
+ genes.to("Ensembl Gene ID").uniq.clean_annotations
135
151
  end
136
152
 
137
153
  returns "Ensembl Gene ID"
@@ -152,10 +168,12 @@ module Genotype
152
168
  (with_damaged_isoforms + truncated + affected_exon_junctions).uniq
153
169
  end
154
170
 
155
- %w(all_affected_genes damaged_genes truncated with_damaged_isoforms affected_exon_junctions long_genes recurrent_genes).each do |name|
171
+ %w(all_affected_genes damaged_genes truncated with_damaged_isoforms with_non_synonymous_mutations affected_exon_junctions long_genes recurrent_genes).each do |name|
156
172
  define_method name do |*args|
173
+ options = args.first
157
174
  @cache ||= {}
158
- @cache[[name, args]] ||= self.job(name, self.jobname).run
175
+ key = [name, Misc.hash2md5(options || {})]
176
+ @cache[key] ||= self.job(name, self.jobname, options || {}).run
159
177
  end
160
178
  end
161
179
  end
@@ -3,24 +3,3 @@ require 'rbbt/workflow'
3
3
  require 'rbbt/sources/go'
4
4
  require 'rbbt/sources/organism'
5
5
  require 'rbbt/entity/gene'
6
-
7
- module GOTerm
8
- extend Entity
9
- self.annotation :organism
10
-
11
- self.format = ["GO Term", "GO ID"]
12
-
13
- def name
14
- if Array === self
15
- self.collect{|id| GO.id2name(id)}
16
- else
17
- GO.id2name(self)
18
- end
19
- end
20
-
21
- def genes
22
- go2genes = Organism.gene_go(organism).tsv(:key_field => "GO ID", :fields => ["Ensembl Gene ID"], :merge => true, :persist => true)
23
- go2genes.unnamed = true
24
- Gene.setup(go2genes[self].first, "Ensembl Gene ID", organism)
25
- end
26
- end
@@ -4,6 +4,7 @@ require 'rbbt/sources/organism'
4
4
  require 'rbbt/mutation/mutation_assessor'
5
5
  require 'rbbt/mutation/sift'
6
6
  require 'rbbt/entity/protein'
7
+ require 'rbbt/sources/uniprot'
7
8
  require 'rbbt/entity/gene'
8
9
  require 'nokogiri'
9
10
 
@@ -13,31 +14,41 @@ module MutatedIsoform
13
14
 
14
15
  self.format = "Mutated Isoform"
15
16
 
16
- property :protein do
17
- if Array === self
18
- Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
19
- else
20
- Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
21
- end
17
+ property :protein => :array2single do
18
+ Protein.setup(self.collect{|mutation| mutation.split(":").first if mutation =~ /^ENSP/}, "Ensembl Protein ID", organism)
22
19
  end
20
+ persist :protein
23
21
 
24
- property :change => :single2array do
25
- self.split(":").last
22
+ property :transcript => :array2single do
23
+ begin
24
+ protein = self.protein
25
+ Transcript.setup(protein.transcript.zip(self.collect{|mutation| mutation.split(":").first}).collect{|p| p.compact.first}, "Ensembl Transcript ID", organism)
26
+ end
26
27
  end
28
+ persist :transcript
27
29
 
28
- property :position => :single2array do
29
- if change.match(/[^\d](\d+)[^\d]/)
30
- $1.to_i
31
- else
32
- nil
33
- end
30
+ property :change => :array2single do
31
+ self.collect{|mi| mi.split(":").last}
32
+ end
33
+ persist :change
34
+
35
+ property :position => :array2single do
36
+ change.collect{|c|
37
+ if c.match(/[^\d](\d+)[^\d]/)
38
+ $1.to_i
39
+ else
40
+ nil
41
+ end
42
+ }
34
43
  end
35
-
44
+ persist :position
45
+
36
46
  property :ensembl_protein_image_url => :single2array do
37
47
  ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
38
48
  "http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
39
49
  end
40
-
50
+ persist :ensembl_protein_image_url
51
+
41
52
  property :marked_svg => :single2array do
42
53
  svg = Open.read(protein.ensembl_protein_image_url)
43
54
  seq_len = protein.sequence_length
@@ -56,10 +67,11 @@ module MutatedIsoform
56
67
  svg
57
68
  end
58
69
  end
70
+ persist :marked_svg
59
71
 
60
72
  ASTERISK = "*"[0]
61
73
  CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
62
- property :consecuence => :single2array do
74
+ property :consequence => :single2array do
63
75
  prot, change = self.split(":")
64
76
 
65
77
  case
@@ -79,14 +91,15 @@ module MutatedIsoform
79
91
  "MISS-SENSE"
80
92
  end
81
93
  end
94
+ persist :consequence
82
95
 
83
96
  property :truncated => :array2single do
84
- @truncated ||= begin
85
- protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
86
- self.collect do |isoform_mutation|
97
+ begin
98
+ protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
99
+ self.collect do |isoform_mutation|
87
100
 
88
- next if isoform_mutation.consecuence != "FRAMESHIFT" and isoform_mutation.consecuence != "NONSENSE"
89
- protein = isoform_mutation.protein
101
+ next if isoform_mutation.consequence != "FRAMESHIFT" and isoform_mutation.consequence != "NONSENSE"
102
+ protein = isoform_mutation.protein
90
103
  position = isoform_mutation.position
91
104
  sequence_length = protein2sequence_length[protein]
92
105
 
@@ -100,80 +113,124 @@ module MutatedIsoform
100
113
  end
101
114
  end
102
115
  end
103
-
104
116
  end
105
-
106
- property :damage_scores => :array2single do
107
- @damage_scores ||= begin
108
- sift_scores.zip(mutation_assessor_scores).collect{|p|
109
- p = p.compact
110
- if p.empty?
111
- nil
112
- else
113
- p.inject(0.0){|acc, e| acc += e} / p.length
114
- end
115
- }
116
- end
117
+ persist :truncated
118
+
119
+ property :damage_scores => :array2single do |*args|
120
+ begin
121
+ methods = args.first
122
+ methods = [:sift, :mutation_assessor] if methods.nil?
123
+ methods = [methods] unless Array === methods
124
+ values = methods.collect{|method|
125
+ case method.to_sym
126
+ when :sift
127
+ sift_scores
128
+ when :mutation_assessor
129
+ mutation_assessor_scores
130
+ else
131
+ raise "Unknown predictive method: #{ method }"
132
+ end
133
+ }
134
+ if values.compact.empty?
135
+ return [nil] * self.length
136
+ else
137
+ scores = values.shift
138
+ scores = scores.zip(*values)
139
+
140
+ scores.collect{|p|
141
+ p = p.compact
142
+ if p.empty?
143
+ nil
144
+ else
145
+ p.inject(0.0){|acc, e| acc += e} / p.length
146
+ end
147
+ }
148
+ end
149
+ end
150
+ end
151
+ persist :damage_scores
152
+
153
+ property :damaged? => :array2single do |*args|
154
+ begin
155
+ methods, threshold = args
156
+ threshold = 0.8 if threshold.nil?
157
+ damage_scores = self.damage_scores(methods)
158
+ truncated = self.truncated
159
+ damage_scores.zip(truncated).collect{|damage, truncated| truncated or (not damage.nil? and damage > threshold) }
160
+ end
117
161
  end
162
+ persist :damaged?
118
163
 
119
164
  property :sift_scores => :array2single do
120
- @sift_scores ||= begin
121
- missense = self.select{|iso_mut| iso_mut.consecuence == "MISS-SENSE"}
165
+ begin
166
+ missense = self.select{|iso_mut| iso_mut.consequence == "MISS-SENSE"}
122
167
 
123
- values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
124
- v.nil? ? nil : v["Prediction"]
125
- }
168
+ values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
169
+ v.nil? ? nil : 1.0 - v["Score 1"].to_f
170
+ }
126
171
 
127
- range = {nil => nil,
128
- "" => nil,
129
- "TOLERATED" => 0,
130
- "*DAMAGING" => 1,
131
- "DAMAGING" => 1}
172
+ values
132
173
 
133
- range.values_at *values
134
- end
174
+ #range = {nil => nil,
175
+ # "" => nil,
176
+ # "TOLERATED" => 0,
177
+ # "*DAMAGING" => 1,
178
+ # "DAMAGING" => 1}
179
+
180
+ #range.values_at *values
181
+ end
135
182
  end
183
+ persist :sift_scores
136
184
 
137
185
  property :mutation_assessor_scores => :array2single do
138
- @mutation_assesor_scores ||= begin
139
- missense = self.select{|mutation| mutation.consecuence == "MISS-SENSE"}
140
-
141
- correspondance = {}
142
- list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
143
- prot, change = mutation.split(":")
144
- next if uniprot.nil?
145
- uniprot_change = [uniprot, change]
146
- correspondance[uniprot_change] ||= []
147
- correspondance[uniprot_change] << mutation
148
- uniprot_change
149
- end.compact
150
-
151
- #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
152
- return [nil] * self.length if list.empty?
153
-
154
- tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
155
-
156
- #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
157
- return [nil] * self.length if tsv.empty?
158
-
159
- new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
160
-
161
- tsv.each do |key, values|
162
- correspondance[key.split(" ")].each do |mutation|
163
- new[mutation] = values["Func. Impact"]
164
- end
165
- end
166
-
167
-
168
- range = {nil => nil,
169
- "" => nil,
170
- "neutral" => 0,
171
- "low" => 0.3,
172
- "medium" => 0.6,
173
- "high" => 1}
174
-
175
- range.values_at *new.values_at(*self)
176
- end
186
+ begin
187
+ missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
188
+
189
+ correspondance = {}
190
+ list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
191
+ prot, change = mutation.split(":")
192
+ next if uniprot.nil?
193
+ uniprot_change = [uniprot.upcase, change.upcase]
194
+ correspondance[uniprot_change] ||= []
195
+ correspondance[uniprot_change] << mutation
196
+ uniprot_change
197
+ end.compact
198
+
199
+ #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
200
+ return [nil] * self.length if list.empty?
201
+
202
+ tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
203
+
204
+ #return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
205
+ return [nil] * self.length if tsv.empty?
206
+
207
+ new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
208
+
209
+ tsv.each do |key, values|
210
+ uniprot, change = key.split(" ")
211
+ uniprot_change = [uniprot.upcase, change.upcase]
212
+ correspondance[uniprot_change].each do |mutation|
213
+ new[mutation] = values["Func. Impact"]
214
+ end
215
+ end
216
+
217
+
218
+ range = {nil => nil,
219
+ "" => nil,
220
+ "neutral" => 0,
221
+ "low" => 0.5,
222
+ "medium" => 0.7,
223
+ "high" => 1.0}
224
+
225
+ range.values_at *new.values_at(*self)
226
+ end
177
227
  end
228
+ persist :mutation_assessor_scores
178
229
 
230
+ property :pdbs => :single do
231
+ uniprot = self.transcript.protein.uniprot
232
+ next if uniprot.nil?
233
+ Uniprot.pdbs_covering_aa_position(uniprot, self.position)
234
+ end
235
+ persist :pdbs
179
236
  end
@@ -6,14 +6,24 @@ module PMID
6
6
 
7
7
  self.format = "PMID"
8
8
 
9
+ property :article => :array2single do
10
+ PubMed.get_article(self).values_at(*self)
11
+ end
12
+ persist :article
13
+
9
14
  property :title => :array2single do
10
- @title ||= begin
11
- PubMed.get_article(self).values_at(*self).collect{|article| article.nil? ? nil : article.title}
12
- end
15
+ article.collect{|a| a.nil? ? nil : a.title}
16
+ end
17
+ persist :title
18
+
19
+ property :text => :array2single do
20
+ article.collect{|a| a.nil? ? nil : a.text}
13
21
  end
22
+ persist :text
14
23
 
15
24
  property :pubmed_url => :single2array do
16
25
  "<a class='pmid' href='http://www.ncbi.nlm.nih.gov/pubmed/#{self}'>#{ self }</a>"
17
26
  end
27
+ persist :pubmed_url
18
28
  end
19
29