rbbt-entities 1.1.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/icgc2rbbt.rb +23 -0
- data/bin/vcf2rbbt.rb +15 -0
- data/lib/rbbt/entity/chromosome_range.rb +73 -0
- data/lib/rbbt/entity/cnv.rb +20 -2
- data/lib/rbbt/entity/gene.rb +147 -74
- data/lib/rbbt/entity/genomic_mutation.rb +380 -50
- data/lib/rbbt/entity/genotype.rb +10 -4
- data/lib/rbbt/entity/interactor.rb +6 -0
- data/lib/rbbt/entity/mutated_isoform.rb +171 -83
- data/lib/rbbt/entity/pmid.rb +33 -6
- data/lib/rbbt/entity/protein.rb +36 -7
- data/lib/rbbt/entity/transcript.rb +20 -4
- data/lib/rbbt/entity.rb +123 -68
- data/test/rbbt/entity/test_gene.rb +16 -2
- data/test/rbbt/entity/test_genomic_mutation.rb +53 -1
- data/test/rbbt/entity/test_pmid.rb +19 -0
- data/test/rbbt/test_entity.rb +100 -5
- metadata +51 -72
@@ -5,10 +5,12 @@ require 'rbbt/mutation/mutation_assessor'
|
|
5
5
|
require 'rbbt/mutation/sift'
|
6
6
|
require 'rbbt/entity/protein'
|
7
7
|
require 'rbbt/sources/uniprot'
|
8
|
+
require 'rbbt/sources/InterPro'
|
8
9
|
require 'rbbt/entity/gene'
|
9
10
|
require 'nokogiri'
|
10
11
|
|
11
12
|
Workflow.require_workflow 'structure'
|
13
|
+
Workflow.require_workflow 'MutEval'
|
12
14
|
|
13
15
|
module MutatedIsoform
|
14
16
|
extend Entity
|
@@ -16,10 +18,12 @@ module MutatedIsoform
|
|
16
18
|
|
17
19
|
self.format = "Mutated Isoform"
|
18
20
|
|
21
|
+
DEFAULT_DAMAGE_PREDICTORS = [:sift, :mutation_assessor]
|
22
|
+
|
19
23
|
property :protein => :array2single do
|
20
|
-
|
24
|
+
proteins = self.collect{|mutation| mutation.split(":").first if mutation[0..3] == "ENSP"}
|
25
|
+
Protein.setup(proteins, "Ensembl Protein ID", organism)
|
21
26
|
end
|
22
|
-
persist :protein
|
23
27
|
|
24
28
|
property :transcript => :array2single do
|
25
29
|
begin
|
@@ -27,12 +31,10 @@ module MutatedIsoform
|
|
27
31
|
Transcript.setup(protein.transcript.zip(self.collect{|mutation| mutation.split(":").first}).collect{|p| p.compact.first}, "Ensembl Transcript ID", organism)
|
28
32
|
end
|
29
33
|
end
|
30
|
-
persist :transcript
|
31
34
|
|
32
35
|
property :change => :array2single do
|
33
36
|
self.collect{|mi| mi.split(":").last}
|
34
37
|
end
|
35
|
-
persist :change
|
36
38
|
|
37
39
|
property :position => :array2single do
|
38
40
|
change.collect{|c|
|
@@ -43,13 +45,11 @@ module MutatedIsoform
|
|
43
45
|
end
|
44
46
|
}
|
45
47
|
end
|
46
|
-
persist :position
|
47
48
|
|
48
49
|
property :ensembl_protein_image_url => :single2array do
|
49
50
|
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
50
51
|
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
51
52
|
end
|
52
|
-
persist :ensembl_protein_image_url
|
53
53
|
|
54
54
|
property :marked_svg => :single2array do
|
55
55
|
svg = Open.read(protein.ensembl_protein_image_url)
|
@@ -57,7 +57,6 @@ module MutatedIsoform
|
|
57
57
|
seq_len = protein.sequence_length
|
58
58
|
position = self.position
|
59
59
|
|
60
|
-
|
61
60
|
doc = Nokogiri::XML(svg)
|
62
61
|
return nil unless doc.css('svg')
|
63
62
|
width = doc.css('svg').first.attr('width').to_f
|
@@ -71,14 +70,17 @@ module MutatedIsoform
|
|
71
70
|
svg
|
72
71
|
end
|
73
72
|
end
|
74
|
-
persist :marked_svg
|
75
73
|
|
76
74
|
ASTERISK = "*"[0]
|
77
75
|
CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
|
78
76
|
property :consequence => :single2array do
|
77
|
+
return nil if self.nil?
|
78
|
+
|
79
79
|
prot, change = self.split(":")
|
80
80
|
|
81
81
|
case
|
82
|
+
when change.nil?
|
83
|
+
nil
|
82
84
|
when change =~ /UTR/
|
83
85
|
"UTR"
|
84
86
|
when (change[0] == ASTERISK and not change[0] == change[-1])
|
@@ -95,37 +97,111 @@ module MutatedIsoform
|
|
95
97
|
"MISS-SENSE"
|
96
98
|
end
|
97
99
|
end
|
98
|
-
|
100
|
+
|
101
|
+
property :in_utr => :array2single do
|
102
|
+
consequence.collect{|c|
|
103
|
+
c == "UTR"
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
property :synonymous => :array2single do
|
108
|
+
consequence.collect{|c|
|
109
|
+
c == "SYNONYMOUS"
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
property :non_synonymous => :array2single do
|
114
|
+
consequence.collect{|c|
|
115
|
+
not c.nil? and c != "SYNONYMOUS" and c != "UTR"
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
119
|
+
property :affected_interpro_domains => :single do
|
120
|
+
if protein.nil?
|
121
|
+
[]
|
122
|
+
else
|
123
|
+
InterProDomain.setup(Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
124
|
+
e.to_i > position and s.to_i < position
|
125
|
+
}.collect{|d,s,e| d }, organism)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
property :affected_interpro_domain_positions => :single do
|
130
|
+
if protein.nil?
|
131
|
+
[]
|
132
|
+
else
|
133
|
+
Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
134
|
+
e.to_i > position and s.to_i < position
|
135
|
+
}.collect{|d,s,e| [d, position - s.to_i, s.to_i, e.to_i]}
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
property :affected_domain_positions => :single do
|
140
|
+
affected_interpro_domain_positions
|
141
|
+
end
|
142
|
+
|
143
|
+
property :affected_domains => :single do
|
144
|
+
affected_interpro_domains
|
145
|
+
end
|
146
|
+
|
147
|
+
property :ablated_interpro_domains => :single do
|
148
|
+
if protein.nil?
|
149
|
+
[]
|
150
|
+
else
|
151
|
+
InterProDomain.setup(Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
152
|
+
e.to_i > position
|
153
|
+
}.collect{|d,s,e| d }, organism)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
property :ablated_interpro_domain_positions => :single do
|
158
|
+
if protein.nil?
|
159
|
+
[]
|
160
|
+
else
|
161
|
+
Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
162
|
+
e.to_i > position
|
163
|
+
}.collect{|d,s,e| [d, s.to_i, e.to_i]}
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
property :ablated_domain_positions => :single do
|
168
|
+
ablated_interpro_domain_positions
|
169
|
+
end
|
170
|
+
|
171
|
+
property :ablated_domains => :single do
|
172
|
+
ablated_interpro_domains
|
173
|
+
end
|
99
174
|
|
100
175
|
property :truncated => :array2single do
|
101
176
|
begin
|
102
177
|
proteins = self.protein.compact.flatten
|
103
|
-
protein2sequence_length = Misc.process_to_hash(proteins){|list| proteins.sequence_length}
|
178
|
+
protein2sequence_length = Misc.process_to_hash(proteins){|list| proteins.any? ? proteins.sequence_length : []}
|
104
179
|
|
105
180
|
self.collect do |isoform_mutation|
|
106
181
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
182
|
+
next if isoform_mutation.consequence != "FRAMESHIFT" and isoform_mutation.consequence != "NONSENSE"
|
183
|
+
protein = isoform_mutation.protein
|
184
|
+
position = isoform_mutation.position
|
185
|
+
sequence_length = protein2sequence_length[protein]
|
186
|
+
|
187
|
+
case
|
188
|
+
when (sequence_length.nil? or position.nil?)
|
189
|
+
nil
|
190
|
+
when position < sequence_length.to_f * 0.7
|
191
|
+
true
|
192
|
+
when (isoform_mutation.ablated_domains.any?)
|
193
|
+
true
|
194
|
+
else
|
195
|
+
false
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
122
199
|
end
|
123
|
-
persist :truncated
|
124
200
|
|
125
201
|
property :damage_scores => :array2single do |*args|
|
126
202
|
begin
|
127
203
|
methods = args.first
|
128
|
-
methods =
|
204
|
+
methods = MutatedIsoform::DEFAULT_DAMAGE_PREDICTORS if methods.nil?
|
129
205
|
methods = [methods] unless Array === methods
|
130
206
|
values = methods.collect{|method|
|
131
207
|
case method.to_sym
|
@@ -133,6 +209,12 @@ module MutatedIsoform
|
|
133
209
|
sift_scores
|
134
210
|
when :mutation_assessor
|
135
211
|
mutation_assessor_scores
|
212
|
+
when :polyphen
|
213
|
+
polyphen_scores
|
214
|
+
when :snps_and_go
|
215
|
+
snps_and_go_scores
|
216
|
+
when :transFIC
|
217
|
+
transFIC_scores(:mutation_assessor)
|
136
218
|
else
|
137
219
|
raise "Unknown predictive method: #{ method }"
|
138
220
|
end
|
@@ -154,94 +236,100 @@ module MutatedIsoform
|
|
154
236
|
end
|
155
237
|
end
|
156
238
|
end
|
157
|
-
persist :damage_scores
|
158
239
|
|
159
240
|
property :damaged? => :array2single do |*args|
|
160
241
|
begin
|
161
242
|
methods, threshold = args
|
243
|
+
threshold, methods = methods, nil if threshold.nil? and not Array === methods
|
162
244
|
threshold = 0.8 if threshold.nil?
|
163
245
|
damage_scores = self.damage_scores(methods)
|
164
246
|
truncated = self.truncated
|
247
|
+
|
165
248
|
damage_scores.zip(truncated).collect{|damage, truncated| truncated or (not damage.nil? and damage > threshold) }
|
166
249
|
end
|
167
250
|
end
|
168
|
-
persist :damaged?
|
169
251
|
|
170
|
-
property :
|
252
|
+
property :snps_and_go_scores => :array2single do
|
171
253
|
begin
|
172
|
-
missense = self.select{|
|
173
|
-
|
174
|
-
|
175
|
-
|
254
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
255
|
+
res = MutEval.job(:snps_and_go, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run
|
256
|
+
res.values_at(*self).collect{|v| (v.nil? or v["SNPSandGO Score"].nil? or v["SNPSandGO Score"].empty?) ?
|
257
|
+
nil :
|
258
|
+
(v["SNPSandGO Prediction"] == "Disease" ? 1.0 - (10.0 - v["SNPSandGO Score"].to_f) / 20 : 0 + (10.0 - v["SNPSandGO Score"].to_f) / 20)
|
176
259
|
}
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
#range = {nil => nil,
|
181
|
-
# "" => nil,
|
182
|
-
# "TOLERATED" => 0,
|
183
|
-
# "*DAMAGING" => 1,
|
184
|
-
# "DAMAGING" => 1}
|
185
|
-
|
186
|
-
#range.values_at *values
|
260
|
+
rescue
|
261
|
+
Log.warn $!.message
|
262
|
+
[nil] * self.length
|
187
263
|
end
|
188
264
|
end
|
189
|
-
persist :sift_scores
|
190
265
|
|
191
|
-
property :
|
266
|
+
property :polyphen_scores => :array2single do
|
192
267
|
begin
|
193
268
|
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
269
|
+
res = MutEval.job(:polyphen, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run
|
270
|
+
res.values_at(*self).collect{|v| (v.nil? or v["Polyphen Score"].nil? or v["Polyphen Score"].empty?) ? nil : v["Polyphen Score"].to_f / 10}
|
271
|
+
rescue
|
272
|
+
Log.warn $!.message
|
273
|
+
[nil] * self.length
|
274
|
+
end
|
275
|
+
end
|
194
276
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
return [nil] * self.length if list.empty?
|
206
|
-
|
207
|
-
tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
|
208
|
-
|
209
|
-
return [nil] * self.length if tsv.empty?
|
277
|
+
property :sift_scores => :array2single do
|
278
|
+
begin
|
279
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
280
|
+
res = MutEval.job(:sift, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run
|
281
|
+
res.values_at(*self).collect{|v| (v.nil? or v["SIFT Score"].nil? or v["SIFT Score"].empty?) ? nil : 1.0 - v["SIFT Score"].to_f}
|
282
|
+
rescue
|
283
|
+
Log.warn $!.message
|
284
|
+
[nil] * self.length
|
285
|
+
end
|
286
|
+
end
|
210
287
|
|
211
|
-
|
288
|
+
property :transFIC_scores => :array2single do |*args|
|
289
|
+
method = args.first || :mutation_assessor
|
290
|
+
range = {nil => nil,
|
291
|
+
"" => nil,
|
292
|
+
"low_impact" => 0,
|
293
|
+
"medium_impact" => 0.7,
|
294
|
+
"high_impact" => 1.0}
|
295
|
+
field_names = {
|
296
|
+
}
|
297
|
+
begin
|
298
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
212
299
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
if correspondance.include? uniprot_change
|
218
|
-
correspondance[uniprot_change].each do |mutation|
|
219
|
-
new[mutation] = values["Func. Impact"]
|
220
|
-
end
|
221
|
-
else
|
222
|
-
Log.medium "Correspondace value missing: #{uniprot_change.inspect}"
|
223
|
-
end
|
224
|
-
end
|
300
|
+
field_name = {
|
301
|
+
:mutation_assessor => "maTransfic",
|
302
|
+
}[method.to_sym]
|
225
303
|
|
304
|
+
MutEval.job(:transFIC, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run.values_at(*self).collect{|v| (v.nil? or v[field_name].nil? or v[field_name].empty?) ? nil : v[field_name].to_f}
|
305
|
+
rescue
|
306
|
+
Log.warn $!.message
|
307
|
+
[nil] * self.length
|
308
|
+
end
|
309
|
+
end
|
226
310
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
311
|
+
property :mutation_assessor_scores => :array2single do
|
312
|
+
range = {nil => nil,
|
313
|
+
"" => nil,
|
314
|
+
"neutral" => 0,
|
315
|
+
"low" => 0.5,
|
316
|
+
"medium" => 0.7,
|
317
|
+
"high" => 1.0}
|
233
318
|
|
234
|
-
|
319
|
+
begin
|
320
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
321
|
+
MutEval.job(:mutation_assessor, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run.values_at(*self).collect{|v| (v.nil? or v["Mutation Assessor Prediction"].nil? or v["Mutation Assessor Prediction"].empty?) ? nil : range[v["Mutation Assessor Prediction"]]}
|
322
|
+
rescue
|
323
|
+
Log.warn $!.message
|
324
|
+
[nil] * self.length
|
235
325
|
end
|
236
326
|
end
|
237
|
-
persist :mutation_assessor_scores
|
238
327
|
|
239
328
|
property :pdbs => :single do
|
240
329
|
uniprot = self.transcript.protein.uniprot
|
241
330
|
next if uniprot.nil?
|
242
331
|
UniProt.pdbs_covering_aa_position(uniprot, self.position)
|
243
332
|
end
|
244
|
-
persist :pdbs
|
245
333
|
|
246
334
|
property :pdbs_and_positions => :single do
|
247
335
|
pdbs.collect do |pdb, info|
|
data/lib/rbbt/entity/pmid.rb
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
require 'rbbt/entity'
|
2
2
|
require 'rbbt/entity/document'
|
3
3
|
require 'rbbt/sources/pubmed'
|
4
|
+
require 'rbbt/sources/gscholar'
|
4
5
|
|
5
6
|
module PMID
|
6
7
|
extend Entity
|
7
8
|
include Document
|
8
9
|
|
10
|
+
self.annotation :default_type
|
11
|
+
|
9
12
|
self.format = "PMID"
|
10
13
|
|
14
|
+
property :docid => :single do |*args|
|
15
|
+
type = args.first || default_type
|
16
|
+
["PMID", self, type].compact * ":"
|
17
|
+
end
|
18
|
+
|
11
19
|
property :article => :array2single do
|
12
20
|
PubMed.get_article(self).values_at(*self)
|
13
21
|
end
|
@@ -15,25 +23,44 @@ module PMID
|
|
15
23
|
property :abstract => :array2single do
|
16
24
|
article.collect{|a| a.nil? ? nil : a.abstract}
|
17
25
|
end
|
18
|
-
persist :abstract
|
19
26
|
|
20
27
|
property :title => :array2single do
|
21
28
|
article.collect{|a| a.nil? ? nil : a.title}
|
22
29
|
end
|
23
|
-
persist :title
|
24
30
|
|
25
31
|
property :journal => :array2single do
|
26
32
|
article.collect{|a| a.nil? ? nil : a.journal}
|
27
33
|
end
|
28
|
-
persist :journal
|
29
34
|
|
30
35
|
property :year => :array2single do
|
31
36
|
article.collect{|a| a.nil? ? nil : a.year}
|
32
37
|
end
|
33
|
-
persist :year
|
34
38
|
|
35
|
-
property :
|
36
|
-
|
39
|
+
property :cites => :single2array do
|
40
|
+
if title
|
41
|
+
begin
|
42
|
+
GoogleScholar.number_cites(title)
|
43
|
+
rescue
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
else
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
property :_get_text => :array2single do |*args|
|
52
|
+
type = args.first || default_type
|
53
|
+
|
54
|
+
case type.to_s
|
55
|
+
when "full_text", 'fulltext'
|
56
|
+
article.collect{|a| a.nil? ? nil : a.full_text}
|
57
|
+
when "abstract"
|
58
|
+
article.collect{|a| a.nil? ? nil : a.abstract }
|
59
|
+
when "best"
|
60
|
+
article.collect{|a| a.nil? ? nil : (a.full_text || a.text) }
|
61
|
+
else
|
62
|
+
article.collect{|a| a.nil? ? nil : a.text}
|
63
|
+
end
|
37
64
|
end
|
38
65
|
|
39
66
|
property :pubmed_url => :single2array do
|
data/lib/rbbt/entity/protein.rb
CHANGED
@@ -19,7 +19,7 @@ module Protein
|
|
19
19
|
|
20
20
|
def self.ensp2sequence(organism, protein)
|
21
21
|
@@ensp2sequence ||= {}
|
22
|
-
@@ensp2sequence[organism] ||= Organism.protein_sequence(organism).tsv :persist => true
|
22
|
+
@@ensp2sequence[organism] ||= Organism.protein_sequence(organism).tsv :persist => true, :unnamed => true
|
23
23
|
if Array === protein
|
24
24
|
@@ensp2sequence[organism].values_at *protein
|
25
25
|
else
|
@@ -29,7 +29,7 @@ module Protein
|
|
29
29
|
|
30
30
|
def self.ensp2enst(organism, protein)
|
31
31
|
@@ensp2enst ||= {}
|
32
|
-
@@ensp2enst[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Protein ID", :fields => ["Ensembl Transcript ID"], :persist => true)
|
32
|
+
@@ensp2enst[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Protein ID", :fields => ["Ensembl Transcript ID"], :persist => true, :unnamed => true)
|
33
33
|
@@ensp2enst[organism][protein]
|
34
34
|
end
|
35
35
|
|
@@ -51,6 +51,7 @@ module Protein
|
|
51
51
|
persist :transcript
|
52
52
|
|
53
53
|
property :ensembl_protein_image_url => :single2array do
|
54
|
+
organism = self.organism || "Hsa"
|
54
55
|
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
55
56
|
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{ensembl};_rmd=d2a8;export=svg"
|
56
57
|
end
|
@@ -67,13 +68,12 @@ module Protein
|
|
67
68
|
end
|
68
69
|
|
69
70
|
property :gene => :array do
|
70
|
-
Gene.setup(to("Ensembl Protein ID").clean_annotations, "Ensembl Protein ID", organism)
|
71
|
+
Gene.setup(to("Ensembl Protein ID").clean_annotations, "Ensembl Protein ID", organism).ensembl
|
71
72
|
end
|
72
73
|
persist :gene #, :yaml, :file => '/tmp/testes'
|
73
74
|
|
74
75
|
property :pfam => :array2single do
|
75
|
-
index = Organism.gene_pfam(organism).tsv :flat, :persist => true
|
76
|
-
index.unnamed = true
|
76
|
+
index = Organism.gene_pfam(organism).tsv :flat, :persist => true, :unnamed => true
|
77
77
|
pfam = index.values_at(*self).flatten
|
78
78
|
Pfam.setup pfam
|
79
79
|
end
|
@@ -82,12 +82,41 @@ module Protein
|
|
82
82
|
property :sequence => :array2single do
|
83
83
|
Protein.ensp2sequence(organism, self.ensembl)
|
84
84
|
end
|
85
|
-
persist :
|
85
|
+
persist :_ary_sequence
|
86
86
|
|
87
87
|
property :sequence_length => :array2single do
|
88
88
|
sequence.collect{|seq| seq.nil? ? nil : seq.length}
|
89
89
|
end
|
90
|
-
persist :
|
90
|
+
persist :_ary_sequence_length
|
91
|
+
|
92
|
+
property :marked_svg => :single2array do |*args|
|
93
|
+
positions = args.first
|
94
|
+
svg = Open.read(ensembl_protein_image_url)
|
95
|
+
|
96
|
+
seq_len = sequence_length
|
97
|
+
|
98
|
+
doc = Nokogiri::XML(svg)
|
99
|
+
return nil unless doc.css('svg').any?
|
100
|
+
width = doc.css('svg').first.attr('width').to_f
|
101
|
+
height = doc.css('svg').first.attr('height').to_f
|
102
|
+
start = doc.css('rect.ac').first.attr('x').to_f
|
103
|
+
|
104
|
+
positions.each do |position|
|
105
|
+
if width and height and start and seq_len and position
|
106
|
+
offset = (width - start)/seq_len * position + start + rand * 10
|
107
|
+
svg = svg.sub(/<\/svg>/,"<rect x='#{offset}' y='1' width='1' height='#{height}' style='fill:rgb(255,0,0);opacity:0.5;stroke:none;'></svg>")
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
svg
|
112
|
+
end
|
113
|
+
|
114
|
+
property :pdbs => :single do
|
115
|
+
next if uniprot.nil?
|
116
|
+
UniProt.pdbs(uniprot)
|
117
|
+
end
|
118
|
+
persist :pdbs
|
119
|
+
|
91
120
|
|
92
121
|
end
|
93
122
|
|
@@ -10,7 +10,7 @@ module Transcript
|
|
10
10
|
|
11
11
|
def self.enst2ensg(organism, transcript)
|
12
12
|
@@enst2ensg ||= {}
|
13
|
-
@@enst2ensg[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Gene ID"], :persist => true
|
13
|
+
@@enst2ensg[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Gene ID"], :persist => true, :unnamed => true)
|
14
14
|
res = if Array === transcript
|
15
15
|
@@enst2ensg[organism].values_at *transcript
|
16
16
|
else
|
@@ -22,7 +22,7 @@ module Transcript
|
|
22
22
|
|
23
23
|
def self.enst2ensp(organism, transcript)
|
24
24
|
@@enst2ensp ||= {}
|
25
|
-
@@enst2ensp[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Protein ID"], :persist => true)
|
25
|
+
@@enst2ensp[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Protein ID"], :persist => true, :unnamed => true)
|
26
26
|
res = if Array === transcript
|
27
27
|
@@enst2ensp[organism].values_at *transcript
|
28
28
|
else
|
@@ -31,6 +31,18 @@ module Transcript
|
|
31
31
|
Protein.setup(res, "Ensembl Protein ID", organism)
|
32
32
|
end
|
33
33
|
|
34
|
+
def self.enst2ense(organism, transcript)
|
35
|
+
@@enst2ense ||= {}
|
36
|
+
@@enst2ense[organism] ||= Organism.transcript_exons(organism).tsv(:persist => true, :fields => "Ensembl Exon ID", :unnamed => true)
|
37
|
+
res = if Array === transcript
|
38
|
+
@@enst2ense[organism].chunked_values_at transcript
|
39
|
+
else
|
40
|
+
@@enst2ense[organism][transcript]
|
41
|
+
end
|
42
|
+
res
|
43
|
+
end
|
44
|
+
|
45
|
+
|
34
46
|
|
35
47
|
property :to! => :array2single do |new_format|
|
36
48
|
return self if format == new_format
|
@@ -44,13 +56,17 @@ module Transcript
|
|
44
56
|
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
45
57
|
end
|
46
58
|
|
59
|
+
property :exons => :array2single do
|
60
|
+
Transcript.enst2ense(organism, self)
|
61
|
+
end
|
62
|
+
persist :_ary_exons
|
63
|
+
|
47
64
|
property :ensembl => :array2single do
|
48
65
|
to "Ensembl Transcript ID"
|
49
66
|
end
|
50
67
|
|
51
68
|
property :sequence => :array2single do
|
52
|
-
transcript_sequence = Organism.transcript_sequence(organism).tsv :persist => true
|
53
|
-
transcript_sequence.unnamed = true
|
69
|
+
transcript_sequence = Organism.transcript_sequence(organism).tsv :persist => true, :unnamed => true
|
54
70
|
transcript_sequence.values_at *self.ensembl
|
55
71
|
end
|
56
72
|
|