rbbt-entities 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/entity.rb +36 -29
- data/lib/rbbt/entity/gene.rb +141 -65
- data/lib/rbbt/entity/genomic_mutation.rb +138 -41
- data/lib/rbbt/entity/genotype.rb +41 -23
- data/lib/rbbt/entity/misc.rb +0 -21
- data/lib/rbbt/entity/mutated_isoform.rb +143 -86
- data/lib/rbbt/entity/pmid.rb +13 -3
- data/lib/rbbt/entity/protein.rb +39 -7
- data/lib/rbbt/entity/transcript.rb +69 -0
- data/test/rbbt/entity/test_gene.rb +1 -1
- data/test/rbbt/entity/test_genomic_mutation.rb +0 -10
- data/test/rbbt/test_entity.rb +101 -0
- metadata +5 -4
data/lib/rbbt/entity.rb
CHANGED
@@ -12,13 +12,14 @@ module Entity
|
|
12
12
|
Entity.formats[base.to_s] = base
|
13
13
|
base.module_eval do
|
14
14
|
class << self
|
15
|
-
attr_accessor :template
|
15
|
+
attr_accessor :template, :list_template, :action_template, :list_action_template
|
16
16
|
alias prev_entity_extended extended
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.extended(data)
|
20
20
|
prev_entity_extended(data)
|
21
|
-
data.extend AnnotatedArray
|
21
|
+
data.extend AnnotatedArray if Array === data
|
22
|
+
data
|
22
23
|
end
|
23
24
|
|
24
25
|
def self.format=(formats)
|
@@ -37,6 +38,14 @@ module Entity
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
41
|
+
def to_yaml(*args)
|
42
|
+
clean_annotations.to_yaml(*args)
|
43
|
+
end
|
44
|
+
|
45
|
+
def marshal_dump
|
46
|
+
clean_annotations
|
47
|
+
end
|
48
|
+
|
40
49
|
def consolidate
|
41
50
|
self.inject(nil){|acc,e|
|
42
51
|
if acc.nil?
|
@@ -54,7 +63,7 @@ module Entity
|
|
54
63
|
when (Hash === name and name.size == 1)
|
55
64
|
name, type = name.collect.first
|
56
65
|
when (String === name or Symbol === name)
|
57
|
-
type = :
|
66
|
+
type = :single
|
58
67
|
else
|
59
68
|
raise "Format of name ( => type) not understood: #{name.inspect}"
|
60
69
|
end
|
@@ -62,61 +71,59 @@ module Entity
|
|
62
71
|
name = name.to_s unless String === name
|
63
72
|
|
64
73
|
case type
|
65
|
-
when :
|
66
|
-
self.module_eval do define_method name, &block end
|
67
|
-
when :array
|
68
|
-
self.module_eval do
|
69
|
-
ary_name = "_ary_" << name
|
70
|
-
define_method ary_name, &block
|
71
|
-
define_method name do |*args|
|
72
|
-
raise "Method #{ name } only defined for array" unless Array === self
|
73
|
-
self.send(ary_name, *args)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
when :single
|
77
|
-
self.module_eval do
|
78
|
-
single_name = "_single_" << name
|
79
|
-
define_method single_name, &block
|
80
|
-
define_method name do |*args|
|
81
|
-
raise "Method #{ name } not defined for array" if Array === self
|
82
|
-
self.send(single_name, *args)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
when :single2array
|
74
|
+
when :single, :single2array
|
86
75
|
self.module_eval do
|
87
76
|
single_name = "_single_" << name
|
88
77
|
define_method single_name, &block
|
89
78
|
define_method name do |*args|
|
90
79
|
if Array === self
|
91
|
-
collect{|e| e.send(
|
80
|
+
self.collect{|e| e.send(name, *args)}
|
92
81
|
else
|
93
82
|
self.send(single_name, *args)
|
94
83
|
end
|
95
84
|
end
|
96
85
|
end
|
97
|
-
when :array2single
|
86
|
+
when :array, :array2single
|
98
87
|
self.module_eval do
|
99
88
|
ary_name = "_ary_" << name
|
100
89
|
define_method ary_name, &block
|
90
|
+
|
101
91
|
define_method name do |*args|
|
102
92
|
case
|
103
93
|
when Array === self
|
104
94
|
self.send(ary_name, *args)
|
105
95
|
when (Array === self.container and self.container.respond_to? ary_name)
|
106
|
-
res = self.container.send(
|
96
|
+
res = self.container.send(name, *args)
|
107
97
|
if Hash === res
|
108
98
|
res[self]
|
109
99
|
else
|
110
|
-
|
111
|
-
res[pos]
|
100
|
+
res[self.container_index]
|
112
101
|
end
|
113
102
|
else
|
114
103
|
res = self.make_list.send(ary_name, *args)
|
115
104
|
Hash === res ? res[self] : res[0]
|
116
105
|
end
|
117
106
|
end
|
107
|
+
|
118
108
|
end
|
119
109
|
end
|
120
110
|
end
|
111
|
+
|
112
|
+
UNPERSISTED_PREFIX = "entity_unpersisted_property_"
|
113
|
+
def persist(method_name, type = nil, options = {})
|
114
|
+
type = :memory if type.nil?
|
115
|
+
|
116
|
+
self.module_eval do
|
117
|
+
orig_name = UNPERSISTED_PREFIX + method_name.to_s
|
118
|
+
alias_method orig_name, method_name unless instance_methods.include? orig_name
|
119
|
+
|
120
|
+
define_method method_name do |*args|
|
121
|
+
Persist.persist(__method__.to_s, type, options.merge(:other => {:args => args, :id => self.id})) do
|
122
|
+
self.send(orig_name, *args)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
121
128
|
end
|
122
129
|
|
data/lib/rbbt/entity/gene.rb
CHANGED
@@ -2,103 +2,146 @@ require 'rbbt/entity'
|
|
2
2
|
require 'rbbt/workflow'
|
3
3
|
require 'rbbt/sources/organism'
|
4
4
|
require 'rbbt/sources/entrez'
|
5
|
+
require 'rbbt/sources/matador'
|
6
|
+
require 'rbbt/sources/cancer'
|
5
7
|
require 'rbbt/entity/protein'
|
6
8
|
require 'rbbt/entity/pmid'
|
9
|
+
require 'rbbt/entity/transcript'
|
7
10
|
|
8
11
|
Workflow.require_workflow "Translation"
|
9
12
|
|
10
13
|
module Gene
|
11
14
|
extend Entity
|
12
15
|
|
16
|
+
def self.ensg2enst(organism, gene)
|
17
|
+
@@ensg2enst ||= {}
|
18
|
+
@@ensg2enst[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :flat, :key_field => "Ensembl Gene ID", :fields => ["Ensembl Transcript ID"], :persist => true).tap{|o| o.unnamed = true}
|
19
|
+
|
20
|
+
if Array === gene
|
21
|
+
@@ensg2enst[organism].values_at *gene
|
22
|
+
else
|
23
|
+
@@ensg2enst[organism][gene]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def self.filter(query, field = nil, options = nil, entity = nil)
|
29
|
+
return true if query == entity
|
30
|
+
|
31
|
+
return true if query == Gene.setup(entity.dup, options.merge(:format => field)).name
|
32
|
+
|
33
|
+
false
|
34
|
+
end
|
35
|
+
|
13
36
|
self.annotation :format
|
14
37
|
self.annotation :organism
|
15
38
|
|
16
39
|
self.format = Organism::Hsa.identifiers.all_fields - ["Ensembl Protein ID", "Ensembl Transcript ID"]
|
17
40
|
|
41
|
+
property :ortholog => :array2single do |other|
|
42
|
+
return self if organism =~ /^#{ other }(?!\w)/
|
43
|
+
new_organism = organism.split(":")
|
44
|
+
new_organism[0] = other
|
45
|
+
new_organism = new_organism * "/"
|
46
|
+
Gene.setup(Organism[organism]["ortholog_#{other}"].tsv(:persist => true).values_at(*self.ensembl).collect{|l| l.first}, "Ensembl Gene ID", new_organism)
|
47
|
+
end
|
48
|
+
persist :ortholog
|
49
|
+
|
18
50
|
property :to! => :array2single do |new_format|
|
19
51
|
return self if format == new_format
|
20
52
|
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
21
53
|
end
|
54
|
+
persist :to!
|
22
55
|
|
23
56
|
property :to => :array2single do |new_format|
|
24
57
|
return self if format == new_format
|
25
|
-
to!(new_format).collect!{|v| v
|
58
|
+
to!(new_format).collect!{|v| Array === v ? v.first : v}
|
26
59
|
end
|
60
|
+
persist :to
|
61
|
+
|
62
|
+
property :strand => :array2single do
|
63
|
+
Organism.gene_positions(organism).tsv(:fields => ["Strand"], :type => :single, :persist => true).values_at *self
|
64
|
+
end
|
65
|
+
persist :strand
|
27
66
|
|
28
67
|
property :ensembl => :array2single do
|
29
|
-
|
68
|
+
to "Ensembl Gene ID"
|
30
69
|
end
|
70
|
+
persist :ensembl
|
31
71
|
|
32
72
|
property :entrez => :array2single do
|
33
|
-
|
73
|
+
to "Entrez Gene ID"
|
34
74
|
end
|
75
|
+
persist :entrez
|
35
76
|
|
77
|
+
property :uniprot => :array2single do
|
78
|
+
to "UniProt/SwissProt Accession"
|
79
|
+
end
|
80
|
+
persist :uniprot
|
36
81
|
|
37
82
|
property :name => :array2single do
|
38
|
-
|
83
|
+
to "Associated Gene Name"
|
39
84
|
end
|
85
|
+
persist :name
|
40
86
|
|
41
87
|
property :chr_start => :array2single do
|
42
|
-
|
43
|
-
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
44
|
-
end
|
88
|
+
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
45
89
|
end
|
90
|
+
persist :chr_start
|
46
91
|
|
47
92
|
property :go_bp_terms => :array2single do
|
48
|
-
|
93
|
+
Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat).values_at *self.ensembl
|
49
94
|
end
|
95
|
+
persist :go_bp_terms
|
50
96
|
|
51
97
|
property :long_name => :single2array do
|
52
98
|
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
53
99
|
gene.nil? ? nil : gene.description.flatten.first
|
54
100
|
end
|
101
|
+
persist :long_name
|
55
102
|
|
56
103
|
property :description => :single2array do
|
57
104
|
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
58
105
|
gene.nil? ? nil : gene.summary.flatten.first
|
59
106
|
end
|
107
|
+
persist :description
|
60
108
|
|
61
109
|
property :transcripts => :array2single do
|
62
|
-
|
63
|
-
|
64
|
-
res = gene_transcripts.values_at(*self.ensembl)
|
65
|
-
res.each{|l| Transcript.setup(l, "Ensembl Transcript ID", organism)}
|
110
|
+
res = Gene.ensg2enst(organism, self.ensembl)
|
111
|
+
Transcript.setup(res, "Ensembl Transcript ID", organism)
|
66
112
|
res
|
67
113
|
end
|
114
|
+
persist :transcripts
|
68
115
|
|
69
116
|
property :proteins => :array2single do
|
70
|
-
|
71
|
-
transcripts = self.transcripts
|
72
|
-
all_transcripts = Transcript.setup(transcripts.flatten, "Ensembl Transcript ID", organism)
|
73
|
-
transcript2protein = nil
|
117
|
+
transcripts = Gene.ensg2enst(organism, self.ensembl)
|
74
118
|
|
75
|
-
|
76
|
-
list.protein
|
77
|
-
}
|
119
|
+
all_transcripts = Transcript.setup(transcripts.flatten.compact.uniq, "Ensembl Transcript ID", organism)
|
78
120
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
121
|
+
transcript2protein = Misc.process_to_hash(all_transcripts){|list|
|
122
|
+
list.protein
|
123
|
+
}
|
124
|
+
|
125
|
+
res = transcripts.collect{|list|
|
126
|
+
Protein.setup(transcript2protein.values_at(*list).compact.uniq, "Ensembl Protein ID", organism)
|
127
|
+
}
|
83
128
|
|
84
|
-
|
85
|
-
}
|
86
|
-
res
|
87
|
-
end
|
129
|
+
Protein.setup(res, "Ensembl Protein ID", organism)
|
88
130
|
end
|
131
|
+
persist :proteins
|
89
132
|
|
90
133
|
property :max_transcript_length => :array2single do
|
91
134
|
transcripts.collect{|list| list.sequence_length.compact.max}
|
92
135
|
end
|
136
|
+
persist :max_transcript_length
|
93
137
|
|
94
138
|
property :max_protein_length => :array2single do
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
proteins.collect{|list| lengths.values_at(*list).compact.max}
|
100
|
-
end
|
139
|
+
proteins = self.proteins
|
140
|
+
all_proteins = Protein.setup(proteins.flatten, "Ensembl Protein ID", organism)
|
141
|
+
lengths = Misc.process_to_hash(all_proteins){|list| list.sequence_length}
|
142
|
+
proteins.collect{|list| lengths.values_at(*list).compact.max}
|
101
143
|
end
|
144
|
+
persist :max_protein_length
|
102
145
|
|
103
146
|
property :chromosome => :array2single do
|
104
147
|
chr = Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true
|
@@ -111,6 +154,7 @@ module Gene
|
|
111
154
|
chr[to("Ensembl Gene ID")]
|
112
155
|
end
|
113
156
|
end
|
157
|
+
persist :chromosome
|
114
158
|
|
115
159
|
property :range => :array2single do
|
116
160
|
pos = Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i
|
@@ -119,56 +163,88 @@ module Gene
|
|
119
163
|
Range.new *pos[gene]
|
120
164
|
end
|
121
165
|
end
|
166
|
+
persist :range
|
122
167
|
|
123
168
|
property :articles => :array2single do
|
124
|
-
|
125
|
-
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat).values_at *self.entrez)
|
126
|
-
end
|
169
|
+
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat, :unnamed => true).values_at *self.entrez)
|
127
170
|
end
|
128
|
-
|
171
|
+
persist :articles
|
129
172
|
|
130
|
-
|
131
|
-
|
173
|
+
property :sequence => :array2single do
|
174
|
+
Organism.gene_sequence(organism).tsv :persist => true
|
175
|
+
@gene_sequence.unnamed = true
|
176
|
+
@gene_sequence.values_at *self.ensembl
|
177
|
+
end
|
178
|
+
persist :sequence
|
132
179
|
|
133
|
-
|
134
|
-
|
180
|
+
property :matador_drugs => :array2single do
|
181
|
+
@@matador ||= Matador.protein_drug.tsv(:persist => false).tap{|o| o.unnamed = true}
|
135
182
|
|
136
|
-
|
183
|
+
ensg = self._to("Ensembl Gene ID")
|
137
184
|
|
138
|
-
|
139
|
-
return self if format == new_format
|
140
|
-
Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
141
|
-
end
|
185
|
+
transcripts = Gene.ensg2enst(organism, ensg)
|
142
186
|
|
143
|
-
|
144
|
-
|
145
|
-
|
187
|
+
t2ps = Misc.process_to_hash(transcripts.compact.flatten.uniq){|l| Transcript.enst2ensp(organism, l).flatten.compact.uniq}
|
188
|
+
|
189
|
+
all_proteins = t2ps.values.flatten.compact
|
190
|
+
|
191
|
+
chemical_pos = @@matador.identify_field "Chemical"
|
192
|
+
|
193
|
+
p2ds = Misc.process_to_hash(all_proteins){|proteins|
|
194
|
+
@@matador.values_at(*proteins).collect{|values|
|
195
|
+
next if values.nil?
|
196
|
+
values[chemical_pos]
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
res = transcripts.collect do |ts|
|
201
|
+
ps = t2ps.values_at(*ts).compact.flatten
|
202
|
+
p2ds.values_at(*ps).flatten.compact.uniq
|
203
|
+
end
|
204
|
+
|
205
|
+
res
|
146
206
|
end
|
207
|
+
persist :matador_drugs
|
147
208
|
|
148
|
-
|
149
|
-
|
209
|
+
property :drugs => :array2single do
|
210
|
+
@matador_drugs = matador_drugs
|
150
211
|
end
|
212
|
+
persist :drugs
|
151
213
|
|
214
|
+
property :kegg_pathway_drugs => :array2single do
|
215
|
+
self.collect{|gene|
|
216
|
+
pathway_genes = gene.kegg_pathways
|
217
|
+
next if pathway_genes.nil?
|
218
|
+
pathway_genes = pathway_genes.compact.flatten.genes.flatten
|
219
|
+
Gene.setup(pathway_genes, "KEGG Gene ID", organism)
|
152
220
|
|
153
|
-
|
154
|
-
|
155
|
-
transcript_sequence.unnamed = true
|
156
|
-
transcript_sequence.values_at *self.ensembl
|
221
|
+
pathway_genes.compact.drugs.compact.flatten.uniq
|
222
|
+
}
|
157
223
|
end
|
224
|
+
persist :kegg_pathway_drugs
|
158
225
|
|
159
|
-
property :
|
160
|
-
|
161
|
-
s.nil? ? nil : s.length
|
162
|
-
}
|
226
|
+
property :pathway_drugs => :array2single do
|
227
|
+
kegg_pathway_drugs
|
163
228
|
end
|
229
|
+
persist :pathway_drugs
|
164
230
|
|
165
|
-
property :
|
166
|
-
|
167
|
-
|
231
|
+
property :related_cancers => :array2single do
|
232
|
+
Cancer["cancer_genes.tsv"].tsv(:persist => true, :type => :list).values_at(*self.name).collect{|v| v.nil? ? nil : v["Tumour Types (Somatic Mutations)"].split(", ") + v["Tumour Types (Germline Mutations)"].split(", ")}
|
233
|
+
end
|
234
|
+
persist :related_cancers
|
168
235
|
|
169
|
-
|
170
|
-
|
171
|
-
|
236
|
+
property :somatic_snvs => :array2single do
|
237
|
+
names = self.name
|
238
|
+
raise "No organism defined" if self.organism.nil?
|
239
|
+
clean_organism = self.organism.sub(/\/.*/,'') + '/jun2011'
|
240
|
+
names.organism = clean_organism
|
241
|
+
ranges = names.chromosome.zip(name.range).collect do |chromosome, range|
|
242
|
+
[chromosome, range.begin, range.end] * ":"
|
243
|
+
end
|
244
|
+
Sequence.job(:somatic_snvs_at_genomic_ranges, File.join("Gene", (names.compact.sort * ", ")[0..80]), :organism => clean_organism, :ranges => ranges).fork.join.load.values_at *ranges
|
172
245
|
end
|
246
|
+
persist :somatic_snvs
|
247
|
+
|
173
248
|
end
|
174
249
|
|
250
|
+
|
@@ -16,14 +16,109 @@ module GenomicMutation
|
|
16
16
|
|
17
17
|
self.format = "Genomic Mutation"
|
18
18
|
|
19
|
-
property :
|
20
|
-
|
19
|
+
property :guess_watson => :array do
|
20
|
+
if Array === self
|
21
|
+
@watson = Sequence.job(:is_watson, jobname, :mutations => self.clean_annotations, :organism => organism).run
|
22
|
+
else
|
23
|
+
@watson = Sequence.job(:is_watson, jobname, :mutations => [self.clean_annotations], :organism => organism).run
|
24
|
+
end
|
21
25
|
end
|
26
|
+
persist :guess_watson
|
27
|
+
|
28
|
+
def watson
|
29
|
+
if @watson.nil?
|
30
|
+
@watson = :missing
|
31
|
+
@watson = guess_watson
|
32
|
+
end
|
33
|
+
@watson
|
34
|
+
end
|
35
|
+
|
36
|
+
def orig_watson
|
37
|
+
@watson
|
38
|
+
end
|
39
|
+
|
40
|
+
property :ensembl_browser => :single2array do
|
41
|
+
"http://#{Misc.ensembl_server(self.organism)}/Homo_sapiens/Location/View?db=core&r=#{chromosome}:#{position - 100}-#{position + 100}"
|
42
|
+
end
|
43
|
+
persist :ensembl_browser
|
44
|
+
|
45
|
+
property :chromosome => :array2single do
|
46
|
+
self.clean_annotations.collect{|mut| mut.split(":")[0]}
|
47
|
+
end
|
48
|
+
persist :chromosome
|
49
|
+
|
50
|
+
property :position => :array2single do
|
51
|
+
self.clean_annotations.collect{|mut| mut.split(":")[1].to_i}
|
52
|
+
end
|
53
|
+
persist :position
|
54
|
+
|
55
|
+
property :base => :array2single do
|
56
|
+
self.clean_annotations.collect{|mut| mut.split(":")[2]}
|
57
|
+
end
|
58
|
+
persist :base
|
59
|
+
|
60
|
+
property :reference => :array2single do
|
61
|
+
Sequence.reference_allele_at_chr_positions(organism, chromosome, position)
|
62
|
+
end
|
63
|
+
persist :reference
|
22
64
|
|
65
|
+
property :score => :array2single do
|
66
|
+
self.clean_annotations.collect{|mut| mut.split(":")[3].to_f}
|
67
|
+
end
|
68
|
+
persist :score
|
69
|
+
|
70
|
+
property :remove_score => :array2single do
|
71
|
+
self.annotate(self.collect{|mut| mut.split(":")[0..2] * ":"})
|
72
|
+
end
|
73
|
+
persist :remove_score
|
23
74
|
|
24
|
-
property :
|
25
|
-
self.split(":")[
|
75
|
+
property :noscore => :single2array do
|
76
|
+
self.annotate self.clean_annotations.collect{|mut| mut.split(":")[0..2]}
|
77
|
+
end
|
78
|
+
persist :noscore
|
79
|
+
|
80
|
+
property :to_watson => :array2single do
|
81
|
+
if watson
|
82
|
+
self
|
83
|
+
else
|
84
|
+
result = Sequence.job(:to_watson, jobname, :mutations => self.clean_annotations, :organism => organism).run
|
85
|
+
self.annotate(result)
|
86
|
+
result
|
87
|
+
end
|
88
|
+
end
|
89
|
+
persist :to_watson
|
90
|
+
|
91
|
+
property :reference => :array2single do
|
92
|
+
Sequence.job(:reference_allele_at_genomic_positions, jobname, :positions => self.clean_annotations, :organism => organism).run.values_at *self
|
93
|
+
end
|
94
|
+
persist :reference
|
95
|
+
|
96
|
+
property :type => :array2single do
|
97
|
+
self.base.zip(reference).collect do |base,reference|
|
98
|
+
type = case
|
99
|
+
when base == reference
|
100
|
+
"none"
|
101
|
+
when (base.nil? or reference.nil? or base == "?" or reference == "?")
|
102
|
+
"unknown"
|
103
|
+
when (base.length > 1 or base == '-')
|
104
|
+
"indel"
|
105
|
+
when (not %w(A G T C).include? base and not %w(A G T C).include? reference)
|
106
|
+
nil
|
107
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
|
108
|
+
"transition"
|
109
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
|
110
|
+
"transition"
|
111
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and not ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
|
112
|
+
"transversion"
|
113
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and not ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
|
114
|
+
"transversion"
|
115
|
+
else
|
116
|
+
"unknown [#{[base, reference] * " - "}]"
|
117
|
+
end
|
118
|
+
type
|
119
|
+
end
|
26
120
|
end
|
121
|
+
persist :type
|
27
122
|
|
28
123
|
property :offset_in_genes => :array2single do
|
29
124
|
gene2chr_start = Misc.process_to_hash(genes.flatten){|list| list.chr_start}
|
@@ -34,60 +129,62 @@ module GenomicMutation
|
|
34
129
|
}.compact
|
35
130
|
}
|
36
131
|
end
|
132
|
+
persist :offset_in_genes
|
37
133
|
|
38
134
|
property :genes => :array2single do
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
44
|
-
end
|
135
|
+
genes = Sequence.job(:genes_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run
|
136
|
+
genes.unnamed = true
|
137
|
+
genes = genes.values_at *self
|
138
|
+
Gene.setup(genes, "Ensembl Gene ID", organism)
|
45
139
|
end
|
140
|
+
persist :genes
|
46
141
|
|
47
142
|
property :mutated_isoforms => :array2single do
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
end
|
143
|
+
res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self.clean_annotations).run.values_at *self
|
144
|
+
res.each{|list| list.organism = organism unless list.nil?}
|
145
|
+
res[0].annotate res if res[0].respond_to? :annotate
|
146
|
+
res
|
53
147
|
end
|
148
|
+
persist :mutated_isoforms
|
54
149
|
|
55
|
-
property :exon_junctions do
|
56
|
-
|
150
|
+
property :exon_junctions => :array do
|
151
|
+
Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.values_at(*self)
|
57
152
|
end
|
153
|
+
persist :exon_junctions
|
58
154
|
|
59
155
|
property :in_exon_junction? => :array2single do
|
60
156
|
exon_junctions.collect{|l| not l.nil? and not l.empty?}
|
61
157
|
end
|
158
|
+
persist :in_exon_junction?
|
62
159
|
|
63
160
|
property :over_gene? => :array2single do |gene|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
@truncated ||= begin
|
80
|
-
mutated_isoforms = self.mutated_isoforms
|
81
|
-
all_mutated_isoforms = MutatedIsoform.setup(mutated_isoforms.flatten.compact, organism)
|
82
|
-
mutated_isoform2truncated = Misc.process_to_hash(all_mutated_isoforms){|list| all_mutated_isoforms.truncated}
|
83
|
-
mutated_isoforms.collect{|list| list.nil? ? [] : mutated_isoform2truncated.values_at(*list)}
|
84
|
-
end
|
161
|
+
if Gene === gene
|
162
|
+
range = gene.range
|
163
|
+
chromosome = gene.chromosome
|
164
|
+
else
|
165
|
+
range = Gene.setup(gene.dup, "Ensembl Gene ID", organism).range
|
166
|
+
chromosome = Gene.setup(gene.dup, "Ensembl Gene ID", organism).chromosome
|
167
|
+
end
|
168
|
+
|
169
|
+
if range.nil?
|
170
|
+
[false] * self.length
|
171
|
+
else
|
172
|
+
chromosome.zip(position).collect{|chr,pos| chr == chromosome and range.include? pos}
|
173
|
+
end
|
174
|
+
|
175
|
+
#genes.clean_annotations.collect{|list| list.include? gene}
|
85
176
|
end
|
86
177
|
|
87
178
|
property :affected_exons => :array2single do
|
88
|
-
|
89
|
-
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self).run.values_at *self
|
90
|
-
end
|
179
|
+
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.values_at *self
|
91
180
|
end
|
181
|
+
persist :affected_exons
|
92
182
|
|
183
|
+
property :damaging? => :array2single do |*args|
|
184
|
+
exon_junctions.zip(mutated_isoforms).collect do |exs, mis|
|
185
|
+
(Array === exs and exs.any?) or
|
186
|
+
(Array === mis and mis.select{|mi| mi.damaged?(*args)}.any?)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
persist :damaging?
|
93
190
|
end
|