rbbt-entities 1.1.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/icgc2rbbt.rb +23 -0
- data/bin/vcf2rbbt.rb +15 -0
- data/lib/rbbt/entity/chromosome_range.rb +73 -0
- data/lib/rbbt/entity/cnv.rb +20 -2
- data/lib/rbbt/entity/gene.rb +147 -74
- data/lib/rbbt/entity/genomic_mutation.rb +380 -50
- data/lib/rbbt/entity/genotype.rb +10 -4
- data/lib/rbbt/entity/interactor.rb +6 -0
- data/lib/rbbt/entity/mutated_isoform.rb +171 -83
- data/lib/rbbt/entity/pmid.rb +33 -6
- data/lib/rbbt/entity/protein.rb +36 -7
- data/lib/rbbt/entity/transcript.rb +20 -4
- data/lib/rbbt/entity.rb +123 -68
- data/test/rbbt/entity/test_gene.rb +16 -2
- data/test/rbbt/entity/test_genomic_mutation.rb +53 -1
- data/test/rbbt/entity/test_pmid.rb +19 -0
- data/test/rbbt/test_entity.rb +100 -5
- metadata +51 -72
@@ -5,10 +5,12 @@ require 'rbbt/mutation/mutation_assessor'
|
|
5
5
|
require 'rbbt/mutation/sift'
|
6
6
|
require 'rbbt/entity/protein'
|
7
7
|
require 'rbbt/sources/uniprot'
|
8
|
+
require 'rbbt/sources/InterPro'
|
8
9
|
require 'rbbt/entity/gene'
|
9
10
|
require 'nokogiri'
|
10
11
|
|
11
12
|
Workflow.require_workflow 'structure'
|
13
|
+
Workflow.require_workflow 'MutEval'
|
12
14
|
|
13
15
|
module MutatedIsoform
|
14
16
|
extend Entity
|
@@ -16,10 +18,12 @@ module MutatedIsoform
|
|
16
18
|
|
17
19
|
self.format = "Mutated Isoform"
|
18
20
|
|
21
|
+
DEFAULT_DAMAGE_PREDICTORS = [:sift, :mutation_assessor]
|
22
|
+
|
19
23
|
property :protein => :array2single do
|
20
|
-
|
24
|
+
proteins = self.collect{|mutation| mutation.split(":").first if mutation[0..3] == "ENSP"}
|
25
|
+
Protein.setup(proteins, "Ensembl Protein ID", organism)
|
21
26
|
end
|
22
|
-
persist :protein
|
23
27
|
|
24
28
|
property :transcript => :array2single do
|
25
29
|
begin
|
@@ -27,12 +31,10 @@ module MutatedIsoform
|
|
27
31
|
Transcript.setup(protein.transcript.zip(self.collect{|mutation| mutation.split(":").first}).collect{|p| p.compact.first}, "Ensembl Transcript ID", organism)
|
28
32
|
end
|
29
33
|
end
|
30
|
-
persist :transcript
|
31
34
|
|
32
35
|
property :change => :array2single do
|
33
36
|
self.collect{|mi| mi.split(":").last}
|
34
37
|
end
|
35
|
-
persist :change
|
36
38
|
|
37
39
|
property :position => :array2single do
|
38
40
|
change.collect{|c|
|
@@ -43,13 +45,11 @@ module MutatedIsoform
|
|
43
45
|
end
|
44
46
|
}
|
45
47
|
end
|
46
|
-
persist :position
|
47
48
|
|
48
49
|
property :ensembl_protein_image_url => :single2array do
|
49
50
|
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
50
51
|
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
51
52
|
end
|
52
|
-
persist :ensembl_protein_image_url
|
53
53
|
|
54
54
|
property :marked_svg => :single2array do
|
55
55
|
svg = Open.read(protein.ensembl_protein_image_url)
|
@@ -57,7 +57,6 @@ module MutatedIsoform
|
|
57
57
|
seq_len = protein.sequence_length
|
58
58
|
position = self.position
|
59
59
|
|
60
|
-
|
61
60
|
doc = Nokogiri::XML(svg)
|
62
61
|
return nil unless doc.css('svg')
|
63
62
|
width = doc.css('svg').first.attr('width').to_f
|
@@ -71,14 +70,17 @@ module MutatedIsoform
|
|
71
70
|
svg
|
72
71
|
end
|
73
72
|
end
|
74
|
-
persist :marked_svg
|
75
73
|
|
76
74
|
ASTERISK = "*"[0]
|
77
75
|
CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
|
78
76
|
property :consequence => :single2array do
|
77
|
+
return nil if self.nil?
|
78
|
+
|
79
79
|
prot, change = self.split(":")
|
80
80
|
|
81
81
|
case
|
82
|
+
when change.nil?
|
83
|
+
nil
|
82
84
|
when change =~ /UTR/
|
83
85
|
"UTR"
|
84
86
|
when (change[0] == ASTERISK and not change[0] == change[-1])
|
@@ -95,37 +97,111 @@ module MutatedIsoform
|
|
95
97
|
"MISS-SENSE"
|
96
98
|
end
|
97
99
|
end
|
98
|
-
|
100
|
+
|
101
|
+
property :in_utr => :array2single do
|
102
|
+
consequence.collect{|c|
|
103
|
+
c == "UTR"
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
property :synonymous => :array2single do
|
108
|
+
consequence.collect{|c|
|
109
|
+
c == "SYNONYMOUS"
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
property :non_synonymous => :array2single do
|
114
|
+
consequence.collect{|c|
|
115
|
+
not c.nil? and c != "SYNONYMOUS" and c != "UTR"
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
119
|
+
property :affected_interpro_domains => :single do
|
120
|
+
if protein.nil?
|
121
|
+
[]
|
122
|
+
else
|
123
|
+
InterProDomain.setup(Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
124
|
+
e.to_i > position and s.to_i < position
|
125
|
+
}.collect{|d,s,e| d }, organism)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
property :affected_interpro_domain_positions => :single do
|
130
|
+
if protein.nil?
|
131
|
+
[]
|
132
|
+
else
|
133
|
+
Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
134
|
+
e.to_i > position and s.to_i < position
|
135
|
+
}.collect{|d,s,e| [d, position - s.to_i, s.to_i, e.to_i]}
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
property :affected_domain_positions => :single do
|
140
|
+
affected_interpro_domain_positions
|
141
|
+
end
|
142
|
+
|
143
|
+
property :affected_domains => :single do
|
144
|
+
affected_interpro_domains
|
145
|
+
end
|
146
|
+
|
147
|
+
property :ablated_interpro_domains => :single do
|
148
|
+
if protein.nil?
|
149
|
+
[]
|
150
|
+
else
|
151
|
+
InterProDomain.setup(Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
152
|
+
e.to_i > position
|
153
|
+
}.collect{|d,s,e| d }, organism)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
property :ablated_interpro_domain_positions => :single do
|
158
|
+
if protein.nil?
|
159
|
+
[]
|
160
|
+
else
|
161
|
+
Misc.zip_fields(protein.interpro_domain_positions || []).select{|d,s,e|
|
162
|
+
e.to_i > position
|
163
|
+
}.collect{|d,s,e| [d, s.to_i, e.to_i]}
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
property :ablated_domain_positions => :single do
|
168
|
+
ablated_interpro_domain_positions
|
169
|
+
end
|
170
|
+
|
171
|
+
property :ablated_domains => :single do
|
172
|
+
ablated_interpro_domains
|
173
|
+
end
|
99
174
|
|
100
175
|
property :truncated => :array2single do
|
101
176
|
begin
|
102
177
|
proteins = self.protein.compact.flatten
|
103
|
-
protein2sequence_length = Misc.process_to_hash(proteins){|list| proteins.sequence_length}
|
178
|
+
protein2sequence_length = Misc.process_to_hash(proteins){|list| proteins.any? ? proteins.sequence_length : []}
|
104
179
|
|
105
180
|
self.collect do |isoform_mutation|
|
106
181
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
182
|
+
next if isoform_mutation.consequence != "FRAMESHIFT" and isoform_mutation.consequence != "NONSENSE"
|
183
|
+
protein = isoform_mutation.protein
|
184
|
+
position = isoform_mutation.position
|
185
|
+
sequence_length = protein2sequence_length[protein]
|
186
|
+
|
187
|
+
case
|
188
|
+
when (sequence_length.nil? or position.nil?)
|
189
|
+
nil
|
190
|
+
when position < sequence_length.to_f * 0.7
|
191
|
+
true
|
192
|
+
when (isoform_mutation.ablated_domains.any?)
|
193
|
+
true
|
194
|
+
else
|
195
|
+
false
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
122
199
|
end
|
123
|
-
persist :truncated
|
124
200
|
|
125
201
|
property :damage_scores => :array2single do |*args|
|
126
202
|
begin
|
127
203
|
methods = args.first
|
128
|
-
methods =
|
204
|
+
methods = MutatedIsoform::DEFAULT_DAMAGE_PREDICTORS if methods.nil?
|
129
205
|
methods = [methods] unless Array === methods
|
130
206
|
values = methods.collect{|method|
|
131
207
|
case method.to_sym
|
@@ -133,6 +209,12 @@ module MutatedIsoform
|
|
133
209
|
sift_scores
|
134
210
|
when :mutation_assessor
|
135
211
|
mutation_assessor_scores
|
212
|
+
when :polyphen
|
213
|
+
polyphen_scores
|
214
|
+
when :snps_and_go
|
215
|
+
snps_and_go_scores
|
216
|
+
when :transFIC
|
217
|
+
transFIC_scores(:mutation_assessor)
|
136
218
|
else
|
137
219
|
raise "Unknown predictive method: #{ method }"
|
138
220
|
end
|
@@ -154,94 +236,100 @@ module MutatedIsoform
|
|
154
236
|
end
|
155
237
|
end
|
156
238
|
end
|
157
|
-
persist :damage_scores
|
158
239
|
|
159
240
|
property :damaged? => :array2single do |*args|
|
160
241
|
begin
|
161
242
|
methods, threshold = args
|
243
|
+
threshold, methods = methods, nil if threshold.nil? and not Array === methods
|
162
244
|
threshold = 0.8 if threshold.nil?
|
163
245
|
damage_scores = self.damage_scores(methods)
|
164
246
|
truncated = self.truncated
|
247
|
+
|
165
248
|
damage_scores.zip(truncated).collect{|damage, truncated| truncated or (not damage.nil? and damage > threshold) }
|
166
249
|
end
|
167
250
|
end
|
168
|
-
persist :damaged?
|
169
251
|
|
170
|
-
property :
|
252
|
+
property :snps_and_go_scores => :array2single do
|
171
253
|
begin
|
172
|
-
missense = self.select{|
|
173
|
-
|
174
|
-
|
175
|
-
|
254
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
255
|
+
res = MutEval.job(:snps_and_go, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run
|
256
|
+
res.values_at(*self).collect{|v| (v.nil? or v["SNPSandGO Score"].nil? or v["SNPSandGO Score"].empty?) ?
|
257
|
+
nil :
|
258
|
+
(v["SNPSandGO Prediction"] == "Disease" ? 1.0 - (10.0 - v["SNPSandGO Score"].to_f) / 20 : 0 + (10.0 - v["SNPSandGO Score"].to_f) / 20)
|
176
259
|
}
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
#range = {nil => nil,
|
181
|
-
# "" => nil,
|
182
|
-
# "TOLERATED" => 0,
|
183
|
-
# "*DAMAGING" => 1,
|
184
|
-
# "DAMAGING" => 1}
|
185
|
-
|
186
|
-
#range.values_at *values
|
260
|
+
rescue
|
261
|
+
Log.warn $!.message
|
262
|
+
[nil] * self.length
|
187
263
|
end
|
188
264
|
end
|
189
|
-
persist :sift_scores
|
190
265
|
|
191
|
-
property :
|
266
|
+
property :polyphen_scores => :array2single do
|
192
267
|
begin
|
193
268
|
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
269
|
+
res = MutEval.job(:polyphen, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run
|
270
|
+
res.values_at(*self).collect{|v| (v.nil? or v["Polyphen Score"].nil? or v["Polyphen Score"].empty?) ? nil : v["Polyphen Score"].to_f / 10}
|
271
|
+
rescue
|
272
|
+
Log.warn $!.message
|
273
|
+
[nil] * self.length
|
274
|
+
end
|
275
|
+
end
|
194
276
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
return [nil] * self.length if list.empty?
|
206
|
-
|
207
|
-
tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
|
208
|
-
|
209
|
-
return [nil] * self.length if tsv.empty?
|
277
|
+
property :sift_scores => :array2single do
|
278
|
+
begin
|
279
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
280
|
+
res = MutEval.job(:sift, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run
|
281
|
+
res.values_at(*self).collect{|v| (v.nil? or v["SIFT Score"].nil? or v["SIFT Score"].empty?) ? nil : 1.0 - v["SIFT Score"].to_f}
|
282
|
+
rescue
|
283
|
+
Log.warn $!.message
|
284
|
+
[nil] * self.length
|
285
|
+
end
|
286
|
+
end
|
210
287
|
|
211
|
-
|
288
|
+
property :transFIC_scores => :array2single do |*args|
|
289
|
+
method = args.first || :mutation_assessor
|
290
|
+
range = {nil => nil,
|
291
|
+
"" => nil,
|
292
|
+
"low_impact" => 0,
|
293
|
+
"medium_impact" => 0.7,
|
294
|
+
"high_impact" => 1.0}
|
295
|
+
field_names = {
|
296
|
+
}
|
297
|
+
begin
|
298
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
212
299
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
if correspondance.include? uniprot_change
|
218
|
-
correspondance[uniprot_change].each do |mutation|
|
219
|
-
new[mutation] = values["Func. Impact"]
|
220
|
-
end
|
221
|
-
else
|
222
|
-
Log.medium "Correspondace value missing: #{uniprot_change.inspect}"
|
223
|
-
end
|
224
|
-
end
|
300
|
+
field_name = {
|
301
|
+
:mutation_assessor => "maTransfic",
|
302
|
+
}[method.to_sym]
|
225
303
|
|
304
|
+
MutEval.job(:transFIC, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run.values_at(*self).collect{|v| (v.nil? or v[field_name].nil? or v[field_name].empty?) ? nil : v[field_name].to_f}
|
305
|
+
rescue
|
306
|
+
Log.warn $!.message
|
307
|
+
[nil] * self.length
|
308
|
+
end
|
309
|
+
end
|
226
310
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
311
|
+
property :mutation_assessor_scores => :array2single do
|
312
|
+
range = {nil => nil,
|
313
|
+
"" => nil,
|
314
|
+
"neutral" => 0,
|
315
|
+
"low" => 0.5,
|
316
|
+
"medium" => 0.7,
|
317
|
+
"high" => 1.0}
|
233
318
|
|
234
|
-
|
319
|
+
begin
|
320
|
+
missense = self.select{|mutation| mutation.consequence == "MISS-SENSE"}
|
321
|
+
MutEval.job(:mutation_assessor, "MutatedIsoforms (#{self.length})", :mutations => missense.sort, :organism => organism).run.values_at(*self).collect{|v| (v.nil? or v["Mutation Assessor Prediction"].nil? or v["Mutation Assessor Prediction"].empty?) ? nil : range[v["Mutation Assessor Prediction"]]}
|
322
|
+
rescue
|
323
|
+
Log.warn $!.message
|
324
|
+
[nil] * self.length
|
235
325
|
end
|
236
326
|
end
|
237
|
-
persist :mutation_assessor_scores
|
238
327
|
|
239
328
|
property :pdbs => :single do
|
240
329
|
uniprot = self.transcript.protein.uniprot
|
241
330
|
next if uniprot.nil?
|
242
331
|
UniProt.pdbs_covering_aa_position(uniprot, self.position)
|
243
332
|
end
|
244
|
-
persist :pdbs
|
245
333
|
|
246
334
|
property :pdbs_and_positions => :single do
|
247
335
|
pdbs.collect do |pdb, info|
|
data/lib/rbbt/entity/pmid.rb
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
require 'rbbt/entity'
|
2
2
|
require 'rbbt/entity/document'
|
3
3
|
require 'rbbt/sources/pubmed'
|
4
|
+
require 'rbbt/sources/gscholar'
|
4
5
|
|
5
6
|
module PMID
|
6
7
|
extend Entity
|
7
8
|
include Document
|
8
9
|
|
10
|
+
self.annotation :default_type
|
11
|
+
|
9
12
|
self.format = "PMID"
|
10
13
|
|
14
|
+
property :docid => :single do |*args|
|
15
|
+
type = args.first || default_type
|
16
|
+
["PMID", self, type].compact * ":"
|
17
|
+
end
|
18
|
+
|
11
19
|
property :article => :array2single do
|
12
20
|
PubMed.get_article(self).values_at(*self)
|
13
21
|
end
|
@@ -15,25 +23,44 @@ module PMID
|
|
15
23
|
property :abstract => :array2single do
|
16
24
|
article.collect{|a| a.nil? ? nil : a.abstract}
|
17
25
|
end
|
18
|
-
persist :abstract
|
19
26
|
|
20
27
|
property :title => :array2single do
|
21
28
|
article.collect{|a| a.nil? ? nil : a.title}
|
22
29
|
end
|
23
|
-
persist :title
|
24
30
|
|
25
31
|
property :journal => :array2single do
|
26
32
|
article.collect{|a| a.nil? ? nil : a.journal}
|
27
33
|
end
|
28
|
-
persist :journal
|
29
34
|
|
30
35
|
property :year => :array2single do
|
31
36
|
article.collect{|a| a.nil? ? nil : a.year}
|
32
37
|
end
|
33
|
-
persist :year
|
34
38
|
|
35
|
-
property :
|
36
|
-
|
39
|
+
property :cites => :single2array do
|
40
|
+
if title
|
41
|
+
begin
|
42
|
+
GoogleScholar.number_cites(title)
|
43
|
+
rescue
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
else
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
property :_get_text => :array2single do |*args|
|
52
|
+
type = args.first || default_type
|
53
|
+
|
54
|
+
case type.to_s
|
55
|
+
when "full_text", 'fulltext'
|
56
|
+
article.collect{|a| a.nil? ? nil : a.full_text}
|
57
|
+
when "abstract"
|
58
|
+
article.collect{|a| a.nil? ? nil : a.abstract }
|
59
|
+
when "best"
|
60
|
+
article.collect{|a| a.nil? ? nil : (a.full_text || a.text) }
|
61
|
+
else
|
62
|
+
article.collect{|a| a.nil? ? nil : a.text}
|
63
|
+
end
|
37
64
|
end
|
38
65
|
|
39
66
|
property :pubmed_url => :single2array do
|
data/lib/rbbt/entity/protein.rb
CHANGED
@@ -19,7 +19,7 @@ module Protein
|
|
19
19
|
|
20
20
|
def self.ensp2sequence(organism, protein)
|
21
21
|
@@ensp2sequence ||= {}
|
22
|
-
@@ensp2sequence[organism] ||= Organism.protein_sequence(organism).tsv :persist => true
|
22
|
+
@@ensp2sequence[organism] ||= Organism.protein_sequence(organism).tsv :persist => true, :unnamed => true
|
23
23
|
if Array === protein
|
24
24
|
@@ensp2sequence[organism].values_at *protein
|
25
25
|
else
|
@@ -29,7 +29,7 @@ module Protein
|
|
29
29
|
|
30
30
|
def self.ensp2enst(organism, protein)
|
31
31
|
@@ensp2enst ||= {}
|
32
|
-
@@ensp2enst[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Protein ID", :fields => ["Ensembl Transcript ID"], :persist => true)
|
32
|
+
@@ensp2enst[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Protein ID", :fields => ["Ensembl Transcript ID"], :persist => true, :unnamed => true)
|
33
33
|
@@ensp2enst[organism][protein]
|
34
34
|
end
|
35
35
|
|
@@ -51,6 +51,7 @@ module Protein
|
|
51
51
|
persist :transcript
|
52
52
|
|
53
53
|
property :ensembl_protein_image_url => :single2array do
|
54
|
+
organism = self.organism || "Hsa"
|
54
55
|
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
55
56
|
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{ensembl};_rmd=d2a8;export=svg"
|
56
57
|
end
|
@@ -67,13 +68,12 @@ module Protein
|
|
67
68
|
end
|
68
69
|
|
69
70
|
property :gene => :array do
|
70
|
-
Gene.setup(to("Ensembl Protein ID").clean_annotations, "Ensembl Protein ID", organism)
|
71
|
+
Gene.setup(to("Ensembl Protein ID").clean_annotations, "Ensembl Protein ID", organism).ensembl
|
71
72
|
end
|
72
73
|
persist :gene #, :yaml, :file => '/tmp/testes'
|
73
74
|
|
74
75
|
property :pfam => :array2single do
|
75
|
-
index = Organism.gene_pfam(organism).tsv :flat, :persist => true
|
76
|
-
index.unnamed = true
|
76
|
+
index = Organism.gene_pfam(organism).tsv :flat, :persist => true, :unnamed => true
|
77
77
|
pfam = index.values_at(*self).flatten
|
78
78
|
Pfam.setup pfam
|
79
79
|
end
|
@@ -82,12 +82,41 @@ module Protein
|
|
82
82
|
property :sequence => :array2single do
|
83
83
|
Protein.ensp2sequence(organism, self.ensembl)
|
84
84
|
end
|
85
|
-
persist :
|
85
|
+
persist :_ary_sequence
|
86
86
|
|
87
87
|
property :sequence_length => :array2single do
|
88
88
|
sequence.collect{|seq| seq.nil? ? nil : seq.length}
|
89
89
|
end
|
90
|
-
persist :
|
90
|
+
persist :_ary_sequence_length
|
91
|
+
|
92
|
+
property :marked_svg => :single2array do |*args|
|
93
|
+
positions = args.first
|
94
|
+
svg = Open.read(ensembl_protein_image_url)
|
95
|
+
|
96
|
+
seq_len = sequence_length
|
97
|
+
|
98
|
+
doc = Nokogiri::XML(svg)
|
99
|
+
return nil unless doc.css('svg').any?
|
100
|
+
width = doc.css('svg').first.attr('width').to_f
|
101
|
+
height = doc.css('svg').first.attr('height').to_f
|
102
|
+
start = doc.css('rect.ac').first.attr('x').to_f
|
103
|
+
|
104
|
+
positions.each do |position|
|
105
|
+
if width and height and start and seq_len and position
|
106
|
+
offset = (width - start)/seq_len * position + start + rand * 10
|
107
|
+
svg = svg.sub(/<\/svg>/,"<rect x='#{offset}' y='1' width='1' height='#{height}' style='fill:rgb(255,0,0);opacity:0.5;stroke:none;'></svg>")
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
svg
|
112
|
+
end
|
113
|
+
|
114
|
+
property :pdbs => :single do
|
115
|
+
next if uniprot.nil?
|
116
|
+
UniProt.pdbs(uniprot)
|
117
|
+
end
|
118
|
+
persist :pdbs
|
119
|
+
|
91
120
|
|
92
121
|
end
|
93
122
|
|
@@ -10,7 +10,7 @@ module Transcript
|
|
10
10
|
|
11
11
|
def self.enst2ensg(organism, transcript)
|
12
12
|
@@enst2ensg ||= {}
|
13
|
-
@@enst2ensg[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Gene ID"], :persist => true
|
13
|
+
@@enst2ensg[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Gene ID"], :persist => true, :unnamed => true)
|
14
14
|
res = if Array === transcript
|
15
15
|
@@enst2ensg[organism].values_at *transcript
|
16
16
|
else
|
@@ -22,7 +22,7 @@ module Transcript
|
|
22
22
|
|
23
23
|
def self.enst2ensp(organism, transcript)
|
24
24
|
@@enst2ensp ||= {}
|
25
|
-
@@enst2ensp[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Protein ID"], :persist => true)
|
25
|
+
@@enst2ensp[organism] ||= Organism.transcripts(organism).tsv(:type => :single, :key_field => "Ensembl Transcript ID", :fields => ["Ensembl Protein ID"], :persist => true, :unnamed => true)
|
26
26
|
res = if Array === transcript
|
27
27
|
@@enst2ensp[organism].values_at *transcript
|
28
28
|
else
|
@@ -31,6 +31,18 @@ module Transcript
|
|
31
31
|
Protein.setup(res, "Ensembl Protein ID", organism)
|
32
32
|
end
|
33
33
|
|
34
|
+
def self.enst2ense(organism, transcript)
|
35
|
+
@@enst2ense ||= {}
|
36
|
+
@@enst2ense[organism] ||= Organism.transcript_exons(organism).tsv(:persist => true, :fields => "Ensembl Exon ID", :unnamed => true)
|
37
|
+
res = if Array === transcript
|
38
|
+
@@enst2ense[organism].chunked_values_at transcript
|
39
|
+
else
|
40
|
+
@@enst2ense[organism][transcript]
|
41
|
+
end
|
42
|
+
res
|
43
|
+
end
|
44
|
+
|
45
|
+
|
34
46
|
|
35
47
|
property :to! => :array2single do |new_format|
|
36
48
|
return self if format == new_format
|
@@ -44,13 +56,17 @@ module Transcript
|
|
44
56
|
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
45
57
|
end
|
46
58
|
|
59
|
+
property :exons => :array2single do
|
60
|
+
Transcript.enst2ense(organism, self)
|
61
|
+
end
|
62
|
+
persist :_ary_exons
|
63
|
+
|
47
64
|
property :ensembl => :array2single do
|
48
65
|
to "Ensembl Transcript ID"
|
49
66
|
end
|
50
67
|
|
51
68
|
property :sequence => :array2single do
|
52
|
-
transcript_sequence = Organism.transcript_sequence(organism).tsv :persist => true
|
53
|
-
transcript_sequence.unnamed = true
|
69
|
+
transcript_sequence = Organism.transcript_sequence(organism).tsv :persist => true, :unnamed => true
|
54
70
|
transcript_sequence.values_at *self.ensembl
|
55
71
|
end
|
56
72
|
|