rbbt-entities 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/entity.rb +36 -29
- data/lib/rbbt/entity/gene.rb +141 -65
- data/lib/rbbt/entity/genomic_mutation.rb +138 -41
- data/lib/rbbt/entity/genotype.rb +41 -23
- data/lib/rbbt/entity/misc.rb +0 -21
- data/lib/rbbt/entity/mutated_isoform.rb +143 -86
- data/lib/rbbt/entity/pmid.rb +13 -3
- data/lib/rbbt/entity/protein.rb +39 -7
- data/lib/rbbt/entity/transcript.rb +69 -0
- data/test/rbbt/entity/test_gene.rb +1 -1
- data/test/rbbt/entity/test_genomic_mutation.rb +0 -10
- data/test/rbbt/test_entity.rb +101 -0
- metadata +5 -4
data/lib/rbbt/entity.rb
CHANGED
@@ -12,13 +12,14 @@ module Entity
|
|
12
12
|
Entity.formats[base.to_s] = base
|
13
13
|
base.module_eval do
|
14
14
|
class << self
|
15
|
-
attr_accessor :template
|
15
|
+
attr_accessor :template, :list_template, :action_template, :list_action_template
|
16
16
|
alias prev_entity_extended extended
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.extended(data)
|
20
20
|
prev_entity_extended(data)
|
21
|
-
data.extend AnnotatedArray
|
21
|
+
data.extend AnnotatedArray if Array === data
|
22
|
+
data
|
22
23
|
end
|
23
24
|
|
24
25
|
def self.format=(formats)
|
@@ -37,6 +38,14 @@ module Entity
|
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
41
|
+
def to_yaml(*args)
|
42
|
+
clean_annotations.to_yaml(*args)
|
43
|
+
end
|
44
|
+
|
45
|
+
def marshal_dump
|
46
|
+
clean_annotations
|
47
|
+
end
|
48
|
+
|
40
49
|
def consolidate
|
41
50
|
self.inject(nil){|acc,e|
|
42
51
|
if acc.nil?
|
@@ -54,7 +63,7 @@ module Entity
|
|
54
63
|
when (Hash === name and name.size == 1)
|
55
64
|
name, type = name.collect.first
|
56
65
|
when (String === name or Symbol === name)
|
57
|
-
type = :
|
66
|
+
type = :single
|
58
67
|
else
|
59
68
|
raise "Format of name ( => type) not understood: #{name.inspect}"
|
60
69
|
end
|
@@ -62,61 +71,59 @@ module Entity
|
|
62
71
|
name = name.to_s unless String === name
|
63
72
|
|
64
73
|
case type
|
65
|
-
when :
|
66
|
-
self.module_eval do define_method name, &block end
|
67
|
-
when :array
|
68
|
-
self.module_eval do
|
69
|
-
ary_name = "_ary_" << name
|
70
|
-
define_method ary_name, &block
|
71
|
-
define_method name do |*args|
|
72
|
-
raise "Method #{ name } only defined for array" unless Array === self
|
73
|
-
self.send(ary_name, *args)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
when :single
|
77
|
-
self.module_eval do
|
78
|
-
single_name = "_single_" << name
|
79
|
-
define_method single_name, &block
|
80
|
-
define_method name do |*args|
|
81
|
-
raise "Method #{ name } not defined for array" if Array === self
|
82
|
-
self.send(single_name, *args)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
when :single2array
|
74
|
+
when :single, :single2array
|
86
75
|
self.module_eval do
|
87
76
|
single_name = "_single_" << name
|
88
77
|
define_method single_name, &block
|
89
78
|
define_method name do |*args|
|
90
79
|
if Array === self
|
91
|
-
collect{|e| e.send(
|
80
|
+
self.collect{|e| e.send(name, *args)}
|
92
81
|
else
|
93
82
|
self.send(single_name, *args)
|
94
83
|
end
|
95
84
|
end
|
96
85
|
end
|
97
|
-
when :array2single
|
86
|
+
when :array, :array2single
|
98
87
|
self.module_eval do
|
99
88
|
ary_name = "_ary_" << name
|
100
89
|
define_method ary_name, &block
|
90
|
+
|
101
91
|
define_method name do |*args|
|
102
92
|
case
|
103
93
|
when Array === self
|
104
94
|
self.send(ary_name, *args)
|
105
95
|
when (Array === self.container and self.container.respond_to? ary_name)
|
106
|
-
res = self.container.send(
|
96
|
+
res = self.container.send(name, *args)
|
107
97
|
if Hash === res
|
108
98
|
res[self]
|
109
99
|
else
|
110
|
-
|
111
|
-
res[pos]
|
100
|
+
res[self.container_index]
|
112
101
|
end
|
113
102
|
else
|
114
103
|
res = self.make_list.send(ary_name, *args)
|
115
104
|
Hash === res ? res[self] : res[0]
|
116
105
|
end
|
117
106
|
end
|
107
|
+
|
118
108
|
end
|
119
109
|
end
|
120
110
|
end
|
111
|
+
|
112
|
+
UNPERSISTED_PREFIX = "entity_unpersisted_property_"
|
113
|
+
def persist(method_name, type = nil, options = {})
|
114
|
+
type = :memory if type.nil?
|
115
|
+
|
116
|
+
self.module_eval do
|
117
|
+
orig_name = UNPERSISTED_PREFIX + method_name.to_s
|
118
|
+
alias_method orig_name, method_name unless instance_methods.include? orig_name
|
119
|
+
|
120
|
+
define_method method_name do |*args|
|
121
|
+
Persist.persist(__method__.to_s, type, options.merge(:other => {:args => args, :id => self.id})) do
|
122
|
+
self.send(orig_name, *args)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
121
128
|
end
|
122
129
|
|
data/lib/rbbt/entity/gene.rb
CHANGED
@@ -2,103 +2,146 @@ require 'rbbt/entity'
|
|
2
2
|
require 'rbbt/workflow'
|
3
3
|
require 'rbbt/sources/organism'
|
4
4
|
require 'rbbt/sources/entrez'
|
5
|
+
require 'rbbt/sources/matador'
|
6
|
+
require 'rbbt/sources/cancer'
|
5
7
|
require 'rbbt/entity/protein'
|
6
8
|
require 'rbbt/entity/pmid'
|
9
|
+
require 'rbbt/entity/transcript'
|
7
10
|
|
8
11
|
Workflow.require_workflow "Translation"
|
9
12
|
|
10
13
|
module Gene
|
11
14
|
extend Entity
|
12
15
|
|
16
|
+
def self.ensg2enst(organism, gene)
|
17
|
+
@@ensg2enst ||= {}
|
18
|
+
@@ensg2enst[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :flat, :key_field => "Ensembl Gene ID", :fields => ["Ensembl Transcript ID"], :persist => true).tap{|o| o.unnamed = true}
|
19
|
+
|
20
|
+
if Array === gene
|
21
|
+
@@ensg2enst[organism].values_at *gene
|
22
|
+
else
|
23
|
+
@@ensg2enst[organism][gene]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def self.filter(query, field = nil, options = nil, entity = nil)
|
29
|
+
return true if query == entity
|
30
|
+
|
31
|
+
return true if query == Gene.setup(entity.dup, options.merge(:format => field)).name
|
32
|
+
|
33
|
+
false
|
34
|
+
end
|
35
|
+
|
13
36
|
self.annotation :format
|
14
37
|
self.annotation :organism
|
15
38
|
|
16
39
|
self.format = Organism::Hsa.identifiers.all_fields - ["Ensembl Protein ID", "Ensembl Transcript ID"]
|
17
40
|
|
41
|
+
property :ortholog => :array2single do |other|
|
42
|
+
return self if organism =~ /^#{ other }(?!\w)/
|
43
|
+
new_organism = organism.split(":")
|
44
|
+
new_organism[0] = other
|
45
|
+
new_organism = new_organism * "/"
|
46
|
+
Gene.setup(Organism[organism]["ortholog_#{other}"].tsv(:persist => true).values_at(*self.ensembl).collect{|l| l.first}, "Ensembl Gene ID", new_organism)
|
47
|
+
end
|
48
|
+
persist :ortholog
|
49
|
+
|
18
50
|
property :to! => :array2single do |new_format|
|
19
51
|
return self if format == new_format
|
20
52
|
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
21
53
|
end
|
54
|
+
persist :to!
|
22
55
|
|
23
56
|
property :to => :array2single do |new_format|
|
24
57
|
return self if format == new_format
|
25
|
-
to!(new_format).collect!{|v| v
|
58
|
+
to!(new_format).collect!{|v| Array === v ? v.first : v}
|
26
59
|
end
|
60
|
+
persist :to
|
61
|
+
|
62
|
+
property :strand => :array2single do
|
63
|
+
Organism.gene_positions(organism).tsv(:fields => ["Strand"], :type => :single, :persist => true).values_at *self
|
64
|
+
end
|
65
|
+
persist :strand
|
27
66
|
|
28
67
|
property :ensembl => :array2single do
|
29
|
-
|
68
|
+
to "Ensembl Gene ID"
|
30
69
|
end
|
70
|
+
persist :ensembl
|
31
71
|
|
32
72
|
property :entrez => :array2single do
|
33
|
-
|
73
|
+
to "Entrez Gene ID"
|
34
74
|
end
|
75
|
+
persist :entrez
|
35
76
|
|
77
|
+
property :uniprot => :array2single do
|
78
|
+
to "UniProt/SwissProt Accession"
|
79
|
+
end
|
80
|
+
persist :uniprot
|
36
81
|
|
37
82
|
property :name => :array2single do
|
38
|
-
|
83
|
+
to "Associated Gene Name"
|
39
84
|
end
|
85
|
+
persist :name
|
40
86
|
|
41
87
|
property :chr_start => :array2single do
|
42
|
-
|
43
|
-
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
44
|
-
end
|
88
|
+
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
45
89
|
end
|
90
|
+
persist :chr_start
|
46
91
|
|
47
92
|
property :go_bp_terms => :array2single do
|
48
|
-
|
93
|
+
Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat).values_at *self.ensembl
|
49
94
|
end
|
95
|
+
persist :go_bp_terms
|
50
96
|
|
51
97
|
property :long_name => :single2array do
|
52
98
|
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
53
99
|
gene.nil? ? nil : gene.description.flatten.first
|
54
100
|
end
|
101
|
+
persist :long_name
|
55
102
|
|
56
103
|
property :description => :single2array do
|
57
104
|
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
58
105
|
gene.nil? ? nil : gene.summary.flatten.first
|
59
106
|
end
|
107
|
+
persist :description
|
60
108
|
|
61
109
|
property :transcripts => :array2single do
|
62
|
-
|
63
|
-
|
64
|
-
res = gene_transcripts.values_at(*self.ensembl)
|
65
|
-
res.each{|l| Transcript.setup(l, "Ensembl Transcript ID", organism)}
|
110
|
+
res = Gene.ensg2enst(organism, self.ensembl)
|
111
|
+
Transcript.setup(res, "Ensembl Transcript ID", organism)
|
66
112
|
res
|
67
113
|
end
|
114
|
+
persist :transcripts
|
68
115
|
|
69
116
|
property :proteins => :array2single do
|
70
|
-
|
71
|
-
transcripts = self.transcripts
|
72
|
-
all_transcripts = Transcript.setup(transcripts.flatten, "Ensembl Transcript ID", organism)
|
73
|
-
transcript2protein = nil
|
117
|
+
transcripts = Gene.ensg2enst(organism, self.ensembl)
|
74
118
|
|
75
|
-
|
76
|
-
list.protein
|
77
|
-
}
|
119
|
+
all_transcripts = Transcript.setup(transcripts.flatten.compact.uniq, "Ensembl Transcript ID", organism)
|
78
120
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
121
|
+
transcript2protein = Misc.process_to_hash(all_transcripts){|list|
|
122
|
+
list.protein
|
123
|
+
}
|
124
|
+
|
125
|
+
res = transcripts.collect{|list|
|
126
|
+
Protein.setup(transcript2protein.values_at(*list).compact.uniq, "Ensembl Protein ID", organism)
|
127
|
+
}
|
83
128
|
|
84
|
-
|
85
|
-
}
|
86
|
-
res
|
87
|
-
end
|
129
|
+
Protein.setup(res, "Ensembl Protein ID", organism)
|
88
130
|
end
|
131
|
+
persist :proteins
|
89
132
|
|
90
133
|
property :max_transcript_length => :array2single do
|
91
134
|
transcripts.collect{|list| list.sequence_length.compact.max}
|
92
135
|
end
|
136
|
+
persist :max_transcript_length
|
93
137
|
|
94
138
|
property :max_protein_length => :array2single do
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
proteins.collect{|list| lengths.values_at(*list).compact.max}
|
100
|
-
end
|
139
|
+
proteins = self.proteins
|
140
|
+
all_proteins = Protein.setup(proteins.flatten, "Ensembl Protein ID", organism)
|
141
|
+
lengths = Misc.process_to_hash(all_proteins){|list| list.sequence_length}
|
142
|
+
proteins.collect{|list| lengths.values_at(*list).compact.max}
|
101
143
|
end
|
144
|
+
persist :max_protein_length
|
102
145
|
|
103
146
|
property :chromosome => :array2single do
|
104
147
|
chr = Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true
|
@@ -111,6 +154,7 @@ module Gene
|
|
111
154
|
chr[to("Ensembl Gene ID")]
|
112
155
|
end
|
113
156
|
end
|
157
|
+
persist :chromosome
|
114
158
|
|
115
159
|
property :range => :array2single do
|
116
160
|
pos = Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i
|
@@ -119,56 +163,88 @@ module Gene
|
|
119
163
|
Range.new *pos[gene]
|
120
164
|
end
|
121
165
|
end
|
166
|
+
persist :range
|
122
167
|
|
123
168
|
property :articles => :array2single do
|
124
|
-
|
125
|
-
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat).values_at *self.entrez)
|
126
|
-
end
|
169
|
+
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat, :unnamed => true).values_at *self.entrez)
|
127
170
|
end
|
128
|
-
|
171
|
+
persist :articles
|
129
172
|
|
130
|
-
|
131
|
-
|
173
|
+
property :sequence => :array2single do
|
174
|
+
Organism.gene_sequence(organism).tsv :persist => true
|
175
|
+
@gene_sequence.unnamed = true
|
176
|
+
@gene_sequence.values_at *self.ensembl
|
177
|
+
end
|
178
|
+
persist :sequence
|
132
179
|
|
133
|
-
|
134
|
-
|
180
|
+
property :matador_drugs => :array2single do
|
181
|
+
@@matador ||= Matador.protein_drug.tsv(:persist => false).tap{|o| o.unnamed = true}
|
135
182
|
|
136
|
-
|
183
|
+
ensg = self._to("Ensembl Gene ID")
|
137
184
|
|
138
|
-
|
139
|
-
return self if format == new_format
|
140
|
-
Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
141
|
-
end
|
185
|
+
transcripts = Gene.ensg2enst(organism, ensg)
|
142
186
|
|
143
|
-
|
144
|
-
|
145
|
-
|
187
|
+
t2ps = Misc.process_to_hash(transcripts.compact.flatten.uniq){|l| Transcript.enst2ensp(organism, l).flatten.compact.uniq}
|
188
|
+
|
189
|
+
all_proteins = t2ps.values.flatten.compact
|
190
|
+
|
191
|
+
chemical_pos = @@matador.identify_field "Chemical"
|
192
|
+
|
193
|
+
p2ds = Misc.process_to_hash(all_proteins){|proteins|
|
194
|
+
@@matador.values_at(*proteins).collect{|values|
|
195
|
+
next if values.nil?
|
196
|
+
values[chemical_pos]
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
res = transcripts.collect do |ts|
|
201
|
+
ps = t2ps.values_at(*ts).compact.flatten
|
202
|
+
p2ds.values_at(*ps).flatten.compact.uniq
|
203
|
+
end
|
204
|
+
|
205
|
+
res
|
146
206
|
end
|
207
|
+
persist :matador_drugs
|
147
208
|
|
148
|
-
|
149
|
-
|
209
|
+
property :drugs => :array2single do
|
210
|
+
@matador_drugs = matador_drugs
|
150
211
|
end
|
212
|
+
persist :drugs
|
151
213
|
|
214
|
+
property :kegg_pathway_drugs => :array2single do
|
215
|
+
self.collect{|gene|
|
216
|
+
pathway_genes = gene.kegg_pathways
|
217
|
+
next if pathway_genes.nil?
|
218
|
+
pathway_genes = pathway_genes.compact.flatten.genes.flatten
|
219
|
+
Gene.setup(pathway_genes, "KEGG Gene ID", organism)
|
152
220
|
|
153
|
-
|
154
|
-
|
155
|
-
transcript_sequence.unnamed = true
|
156
|
-
transcript_sequence.values_at *self.ensembl
|
221
|
+
pathway_genes.compact.drugs.compact.flatten.uniq
|
222
|
+
}
|
157
223
|
end
|
224
|
+
persist :kegg_pathway_drugs
|
158
225
|
|
159
|
-
property :
|
160
|
-
|
161
|
-
s.nil? ? nil : s.length
|
162
|
-
}
|
226
|
+
property :pathway_drugs => :array2single do
|
227
|
+
kegg_pathway_drugs
|
163
228
|
end
|
229
|
+
persist :pathway_drugs
|
164
230
|
|
165
|
-
property :
|
166
|
-
|
167
|
-
|
231
|
+
property :related_cancers => :array2single do
|
232
|
+
Cancer["cancer_genes.tsv"].tsv(:persist => true, :type => :list).values_at(*self.name).collect{|v| v.nil? ? nil : v["Tumour Types (Somatic Mutations)"].split(", ") + v["Tumour Types (Germline Mutations)"].split(", ")}
|
233
|
+
end
|
234
|
+
persist :related_cancers
|
168
235
|
|
169
|
-
|
170
|
-
|
171
|
-
|
236
|
+
property :somatic_snvs => :array2single do
|
237
|
+
names = self.name
|
238
|
+
raise "No organism defined" if self.organism.nil?
|
239
|
+
clean_organism = self.organism.sub(/\/.*/,'') + '/jun2011'
|
240
|
+
names.organism = clean_organism
|
241
|
+
ranges = names.chromosome.zip(name.range).collect do |chromosome, range|
|
242
|
+
[chromosome, range.begin, range.end] * ":"
|
243
|
+
end
|
244
|
+
Sequence.job(:somatic_snvs_at_genomic_ranges, File.join("Gene", (names.compact.sort * ", ")[0..80]), :organism => clean_organism, :ranges => ranges).fork.join.load.values_at *ranges
|
172
245
|
end
|
246
|
+
persist :somatic_snvs
|
247
|
+
|
173
248
|
end
|
174
249
|
|
250
|
+
|
@@ -16,14 +16,109 @@ module GenomicMutation
|
|
16
16
|
|
17
17
|
self.format = "Genomic Mutation"
|
18
18
|
|
19
|
-
property :
|
20
|
-
|
19
|
+
property :guess_watson => :array do
|
20
|
+
if Array === self
|
21
|
+
@watson = Sequence.job(:is_watson, jobname, :mutations => self.clean_annotations, :organism => organism).run
|
22
|
+
else
|
23
|
+
@watson = Sequence.job(:is_watson, jobname, :mutations => [self.clean_annotations], :organism => organism).run
|
24
|
+
end
|
21
25
|
end
|
26
|
+
persist :guess_watson
|
27
|
+
|
28
|
+
def watson
|
29
|
+
if @watson.nil?
|
30
|
+
@watson = :missing
|
31
|
+
@watson = guess_watson
|
32
|
+
end
|
33
|
+
@watson
|
34
|
+
end
|
35
|
+
|
36
|
+
def orig_watson
|
37
|
+
@watson
|
38
|
+
end
|
39
|
+
|
40
|
+
property :ensembl_browser => :single2array do
|
41
|
+
"http://#{Misc.ensembl_server(self.organism)}/Homo_sapiens/Location/View?db=core&r=#{chromosome}:#{position - 100}-#{position + 100}"
|
42
|
+
end
|
43
|
+
persist :ensembl_browser
|
44
|
+
|
45
|
+
property :chromosome => :array2single do
|
46
|
+
self.clean_annotations.collect{|mut| mut.split(":")[0]}
|
47
|
+
end
|
48
|
+
persist :chromosome
|
49
|
+
|
50
|
+
property :position => :array2single do
|
51
|
+
self.clean_annotations.collect{|mut| mut.split(":")[1].to_i}
|
52
|
+
end
|
53
|
+
persist :position
|
54
|
+
|
55
|
+
property :base => :array2single do
|
56
|
+
self.clean_annotations.collect{|mut| mut.split(":")[2]}
|
57
|
+
end
|
58
|
+
persist :base
|
59
|
+
|
60
|
+
property :reference => :array2single do
|
61
|
+
Sequence.reference_allele_at_chr_positions(organism, chromosome, position)
|
62
|
+
end
|
63
|
+
persist :reference
|
22
64
|
|
65
|
+
property :score => :array2single do
|
66
|
+
self.clean_annotations.collect{|mut| mut.split(":")[3].to_f}
|
67
|
+
end
|
68
|
+
persist :score
|
69
|
+
|
70
|
+
property :remove_score => :array2single do
|
71
|
+
self.annotate(self.collect{|mut| mut.split(":")[0..2] * ":"})
|
72
|
+
end
|
73
|
+
persist :remove_score
|
23
74
|
|
24
|
-
property :
|
25
|
-
self.split(":")[
|
75
|
+
property :noscore => :single2array do
|
76
|
+
self.annotate self.clean_annotations.collect{|mut| mut.split(":")[0..2]}
|
77
|
+
end
|
78
|
+
persist :noscore
|
79
|
+
|
80
|
+
property :to_watson => :array2single do
|
81
|
+
if watson
|
82
|
+
self
|
83
|
+
else
|
84
|
+
result = Sequence.job(:to_watson, jobname, :mutations => self.clean_annotations, :organism => organism).run
|
85
|
+
self.annotate(result)
|
86
|
+
result
|
87
|
+
end
|
88
|
+
end
|
89
|
+
persist :to_watson
|
90
|
+
|
91
|
+
property :reference => :array2single do
|
92
|
+
Sequence.job(:reference_allele_at_genomic_positions, jobname, :positions => self.clean_annotations, :organism => organism).run.values_at *self
|
93
|
+
end
|
94
|
+
persist :reference
|
95
|
+
|
96
|
+
property :type => :array2single do
|
97
|
+
self.base.zip(reference).collect do |base,reference|
|
98
|
+
type = case
|
99
|
+
when base == reference
|
100
|
+
"none"
|
101
|
+
when (base.nil? or reference.nil? or base == "?" or reference == "?")
|
102
|
+
"unknown"
|
103
|
+
when (base.length > 1 or base == '-')
|
104
|
+
"indel"
|
105
|
+
when (not %w(A G T C).include? base and not %w(A G T C).include? reference)
|
106
|
+
nil
|
107
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
|
108
|
+
"transition"
|
109
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
|
110
|
+
"transition"
|
111
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["A", "G"]).any? and not ((Misc::IUPAC2BASE[reference] || []) & ["A", "G"]).any?)
|
112
|
+
"transversion"
|
113
|
+
when (((Misc::IUPAC2BASE[base] || []) & ["T", "C"]).any? and not ((Misc::IUPAC2BASE[reference] || []) & ["T", "C"]).any?)
|
114
|
+
"transversion"
|
115
|
+
else
|
116
|
+
"unknown [#{[base, reference] * " - "}]"
|
117
|
+
end
|
118
|
+
type
|
119
|
+
end
|
26
120
|
end
|
121
|
+
persist :type
|
27
122
|
|
28
123
|
property :offset_in_genes => :array2single do
|
29
124
|
gene2chr_start = Misc.process_to_hash(genes.flatten){|list| list.chr_start}
|
@@ -34,60 +129,62 @@ module GenomicMutation
|
|
34
129
|
}.compact
|
35
130
|
}
|
36
131
|
end
|
132
|
+
persist :offset_in_genes
|
37
133
|
|
38
134
|
property :genes => :array2single do
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
Gene.setup(genes, "Ensembl Gene ID", organism)
|
44
|
-
end
|
135
|
+
genes = Sequence.job(:genes_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run
|
136
|
+
genes.unnamed = true
|
137
|
+
genes = genes.values_at *self
|
138
|
+
Gene.setup(genes, "Ensembl Gene ID", organism)
|
45
139
|
end
|
140
|
+
persist :genes
|
46
141
|
|
47
142
|
property :mutated_isoforms => :array2single do
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
end
|
143
|
+
res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self.clean_annotations).run.values_at *self
|
144
|
+
res.each{|list| list.organism = organism unless list.nil?}
|
145
|
+
res[0].annotate res if res[0].respond_to? :annotate
|
146
|
+
res
|
53
147
|
end
|
148
|
+
persist :mutated_isoforms
|
54
149
|
|
55
|
-
property :exon_junctions do
|
56
|
-
|
150
|
+
property :exon_junctions => :array do
|
151
|
+
Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.values_at(*self)
|
57
152
|
end
|
153
|
+
persist :exon_junctions
|
58
154
|
|
59
155
|
property :in_exon_junction? => :array2single do
|
60
156
|
exon_junctions.collect{|l| not l.nil? and not l.empty?}
|
61
157
|
end
|
158
|
+
persist :in_exon_junction?
|
62
159
|
|
63
160
|
property :over_gene? => :array2single do |gene|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
@truncated ||= begin
|
80
|
-
mutated_isoforms = self.mutated_isoforms
|
81
|
-
all_mutated_isoforms = MutatedIsoform.setup(mutated_isoforms.flatten.compact, organism)
|
82
|
-
mutated_isoform2truncated = Misc.process_to_hash(all_mutated_isoforms){|list| all_mutated_isoforms.truncated}
|
83
|
-
mutated_isoforms.collect{|list| list.nil? ? [] : mutated_isoform2truncated.values_at(*list)}
|
84
|
-
end
|
161
|
+
if Gene === gene
|
162
|
+
range = gene.range
|
163
|
+
chromosome = gene.chromosome
|
164
|
+
else
|
165
|
+
range = Gene.setup(gene.dup, "Ensembl Gene ID", organism).range
|
166
|
+
chromosome = Gene.setup(gene.dup, "Ensembl Gene ID", organism).chromosome
|
167
|
+
end
|
168
|
+
|
169
|
+
if range.nil?
|
170
|
+
[false] * self.length
|
171
|
+
else
|
172
|
+
chromosome.zip(position).collect{|chr,pos| chr == chromosome and range.include? pos}
|
173
|
+
end
|
174
|
+
|
175
|
+
#genes.clean_annotations.collect{|list| list.include? gene}
|
85
176
|
end
|
86
177
|
|
87
178
|
property :affected_exons => :array2single do
|
88
|
-
|
89
|
-
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self).run.values_at *self
|
90
|
-
end
|
179
|
+
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self.clean_annotations).run.values_at *self
|
91
180
|
end
|
181
|
+
persist :affected_exons
|
92
182
|
|
183
|
+
property :damaging? => :array2single do |*args|
|
184
|
+
exon_junctions.zip(mutated_isoforms).collect do |exs, mis|
|
185
|
+
(Array === exs and exs.any?) or
|
186
|
+
(Array === mis and mis.select{|mi| mi.damaged?(*args)}.any?)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
persist :damaging?
|
93
190
|
end
|