rbbt-entities 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/entity.rb +89 -0
- data/lib/rbbt/entity/cnv.rb +37 -0
- data/lib/rbbt/entity/gene.rb +126 -54
- data/lib/rbbt/entity/genomic_mutation.rb +93 -0
- data/lib/rbbt/entity/genotype.rb +124 -148
- data/lib/rbbt/entity/mutated_isoform.rb +179 -0
- data/lib/rbbt/entity/pmid.rb +19 -0
- data/lib/rbbt/entity/protein.rb +40 -8
- data/test/rbbt/entity/test_gene.rb +40 -0
- data/test/rbbt/entity/test_genomic_mutation.rb +38 -0
- data/test/rbbt/entity/test_mutated_isoform.rb +37 -0
- data/test/rbbt/entity/test_protein.rb +27 -0
- data/test/rbbt/test_entity.rb +0 -0
- metadata +16 -4
data/lib/rbbt/entity.rb
CHANGED
@@ -27,6 +27,95 @@ module Entity
|
|
27
27
|
Entity.formats[format] = self
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
def clean_annotations
|
32
|
+
case
|
33
|
+
when Array === self
|
34
|
+
self.annotated_array_clean_collect{|e| e.respond_to?(:clean_annotations)? e.clean_annotations : e}
|
35
|
+
when String === self
|
36
|
+
"" << self
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def consolidate
|
41
|
+
self.inject(nil){|acc,e|
|
42
|
+
if acc.nil?
|
43
|
+
acc = e
|
44
|
+
else
|
45
|
+
acc.concat e
|
46
|
+
end
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def property(name, &block)
|
53
|
+
case
|
54
|
+
when (Hash === name and name.size == 1)
|
55
|
+
name, type = name.collect.first
|
56
|
+
when (String === name or Symbol === name)
|
57
|
+
type = :both
|
58
|
+
else
|
59
|
+
raise "Format of name ( => type) not understood: #{name.inspect}"
|
60
|
+
end
|
61
|
+
|
62
|
+
name = name.to_s unless String === name
|
63
|
+
|
64
|
+
case type
|
65
|
+
when :both
|
66
|
+
self.module_eval do define_method name, &block end
|
67
|
+
when :array
|
68
|
+
self.module_eval do
|
69
|
+
ary_name = "_ary_" << name
|
70
|
+
define_method ary_name, &block
|
71
|
+
define_method name do |*args|
|
72
|
+
raise "Method #{ name } only defined for array" unless Array === self
|
73
|
+
self.send(ary_name, *args)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
when :single
|
77
|
+
self.module_eval do
|
78
|
+
single_name = "_single_" << name
|
79
|
+
define_method single_name, &block
|
80
|
+
define_method name do |*args|
|
81
|
+
raise "Method #{ name } not defined for array" if Array === self
|
82
|
+
self.send(single_name, *args)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
when :single2array
|
86
|
+
self.module_eval do
|
87
|
+
single_name = "_single_" << name
|
88
|
+
define_method single_name, &block
|
89
|
+
define_method name do |*args|
|
90
|
+
if Array === self
|
91
|
+
collect{|e| e.send(single_name, *args)}
|
92
|
+
else
|
93
|
+
self.send(single_name, *args)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
when :array2single
|
98
|
+
self.module_eval do
|
99
|
+
ary_name = "_ary_" << name
|
100
|
+
define_method ary_name, &block
|
101
|
+
define_method name do |*args|
|
102
|
+
case
|
103
|
+
when Array === self
|
104
|
+
self.send(ary_name, *args)
|
105
|
+
when (Array === self.container and self.container.respond_to? ary_name)
|
106
|
+
res = self.container.send(ary_name, *args)
|
107
|
+
if Hash === res
|
108
|
+
res[self]
|
109
|
+
else
|
110
|
+
pos = self.container.index self
|
111
|
+
res[pos]
|
112
|
+
end
|
113
|
+
else
|
114
|
+
res = self.make_list.send(ary_name, *args)
|
115
|
+
Hash === res ? res[self] : res[0]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
30
119
|
end
|
31
120
|
end
|
32
121
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/entity/gene'
|
5
|
+
|
6
|
+
Workflow.require_workflow "Sequence"
|
7
|
+
|
8
|
+
module CNV
|
9
|
+
extend Entity
|
10
|
+
self.annotation :jobname
|
11
|
+
self.annotation :organism
|
12
|
+
|
13
|
+
self.format = "Copy Number Variation"
|
14
|
+
|
15
|
+
property :variation => :single2array do
|
16
|
+
self.split(":").last
|
17
|
+
end
|
18
|
+
|
19
|
+
property :loss? => :array2single do
|
20
|
+
@loss ||= self.variation.collect{|v| v =~/loss/i}
|
21
|
+
end
|
22
|
+
|
23
|
+
property :gain? => :array2single do
|
24
|
+
@gain ||= self.variation.collect{|v| v =~/gain/i}
|
25
|
+
end
|
26
|
+
|
27
|
+
property :genes => :array2single do
|
28
|
+
@genes ||= begin
|
29
|
+
genes = Sequence.job(:genes_at_genomic_ranges, jobname, :organism => organism, :ranges => self).run
|
30
|
+
genes.unnamed = true
|
31
|
+
genes = genes.values_at *self
|
32
|
+
Gene.setup(genes, "Ensembl Gene ID", organism)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
data/lib/rbbt/entity/gene.rb
CHANGED
@@ -2,6 +2,8 @@ require 'rbbt/entity'
|
|
2
2
|
require 'rbbt/workflow'
|
3
3
|
require 'rbbt/sources/organism'
|
4
4
|
require 'rbbt/sources/entrez'
|
5
|
+
require 'rbbt/entity/protein'
|
6
|
+
require 'rbbt/entity/pmid'
|
5
7
|
|
6
8
|
Workflow.require_workflow "Translation"
|
7
9
|
|
@@ -11,50 +13,96 @@ module Gene
|
|
11
13
|
self.annotation :format
|
12
14
|
self.annotation :organism
|
13
15
|
|
14
|
-
self.format = Organism::Hsa.identifiers.all_fields
|
16
|
+
self.format = Organism::Hsa.identifiers.all_fields - ["Ensembl Protein ID", "Ensembl Transcript ID"]
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
+
property :to! => :array2single do |new_format|
|
19
|
+
return self if format == new_format
|
20
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
18
21
|
end
|
19
22
|
|
20
|
-
|
21
|
-
if
|
22
|
-
|
23
|
-
else
|
24
|
-
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
25
|
-
gene.nil? ? nil : gene.summary
|
26
|
-
end
|
23
|
+
property :to => :array2single do |new_format|
|
24
|
+
return self if format == new_format
|
25
|
+
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
27
26
|
end
|
28
27
|
|
29
|
-
|
30
|
-
|
31
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
32
|
-
else
|
33
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => [self], :format => new_format).exec[self], new_format, organism)
|
34
|
-
end
|
28
|
+
property :ensembl => :array2single do
|
29
|
+
@ensembl ||= to "Ensembl Gene ID"
|
35
30
|
end
|
36
31
|
|
37
|
-
|
38
|
-
|
39
|
-
if Array === self
|
40
|
-
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
41
|
-
else
|
42
|
-
v = to!(new_format)
|
43
|
-
v.nil? ? nil : v.first
|
44
|
-
end
|
32
|
+
property :entrez => :array2single do
|
33
|
+
@entrez ||= to "Entrez Gene ID"
|
45
34
|
end
|
46
35
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
36
|
+
|
37
|
+
property :name => :array2single do
|
38
|
+
@name ||= to "Associated Gene Name"
|
39
|
+
end
|
40
|
+
|
41
|
+
property :chr_start => :array2single do
|
42
|
+
@chr_start = begin
|
43
|
+
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
property :go_bp_terms => :array2single do
|
48
|
+
@go_bp_terms ||= Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat).values_at *self.ensembl
|
49
|
+
end
|
50
|
+
|
51
|
+
property :long_name => :single2array do
|
52
|
+
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
53
|
+
gene.nil? ? nil : gene.description.flatten.first
|
54
|
+
end
|
55
|
+
|
56
|
+
property :description => :single2array do
|
57
|
+
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
58
|
+
gene.nil? ? nil : gene.summary.flatten.first
|
59
|
+
end
|
60
|
+
|
61
|
+
property :transcripts => :array2single do
|
62
|
+
gene_transcripts = Organism.gene_transcripts(organism).tsv :persist => true
|
63
|
+
gene_transcripts.unnamed = true
|
64
|
+
res = gene_transcripts.values_at(*self.ensembl)
|
65
|
+
res.each{|l| Transcript.setup(l, "Ensembl Transcript ID", organism)}
|
66
|
+
res
|
67
|
+
end
|
68
|
+
|
69
|
+
property :proteins => :array2single do
|
70
|
+
@proteins ||= begin
|
71
|
+
transcripts = self.transcripts
|
72
|
+
all_transcripts = Transcript.setup(transcripts.flatten, "Ensembl Transcript ID", organism)
|
73
|
+
transcript2protein = nil
|
74
|
+
|
75
|
+
transcript2protein = Misc.process_to_hash(all_transcripts){|list|
|
76
|
+
list.protein
|
77
|
+
}
|
78
|
+
|
79
|
+
res = nil
|
80
|
+
res = transcripts.collect{|list|
|
81
|
+
Protein.setup(transcript2protein.values_at(*list), "Ensembl Protein ID", organism)
|
82
|
+
}
|
83
|
+
|
84
|
+
res.each{|l|
|
85
|
+
}
|
86
|
+
res
|
87
|
+
end
|
54
88
|
end
|
55
89
|
|
56
|
-
|
90
|
+
property :max_transcript_length => :array2single do
|
91
|
+
transcripts.collect{|list| list.sequence_length.compact.max}
|
92
|
+
end
|
93
|
+
|
94
|
+
property :max_protein_length => :array2single do
|
95
|
+
@max_protein_length ||= begin
|
96
|
+
proteins = self.proteins
|
97
|
+
all_proteins = Protein.setup(proteins.flatten, "Ensembl Protein ID", organism)
|
98
|
+
lengths = Misc.process_to_hash(all_proteins){|list| list.sequence_length}
|
99
|
+
proteins.collect{|list| lengths.values_at(*list).compact.max}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
property :chromosome => :array2single do
|
57
104
|
chr = Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true
|
105
|
+
chr.unnamed = true
|
58
106
|
if Array === self
|
59
107
|
to("Ensembl Gene ID").collect do |gene|
|
60
108
|
chr[gene]
|
@@ -64,39 +112,63 @@ module Gene
|
|
64
112
|
end
|
65
113
|
end
|
66
114
|
|
67
|
-
|
115
|
+
property :range => :array2single do
|
68
116
|
pos = Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
Range.new *pos[gene]
|
73
|
-
end
|
74
|
-
else
|
75
|
-
return nil if not pos.include? to("Ensembl Gene ID")
|
76
|
-
Range.new *pos[to("Ensembl Gene ID")]
|
117
|
+
to("Ensembl Gene ID").collect do |gene|
|
118
|
+
next if not pos.include? gene
|
119
|
+
Range.new *pos[gene]
|
77
120
|
end
|
78
121
|
end
|
79
122
|
|
123
|
+
property :articles => :array2single do
|
124
|
+
@articles ||= begin
|
125
|
+
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat).values_at *self.entrez)
|
126
|
+
end
|
127
|
+
end
|
80
128
|
end
|
81
129
|
|
82
130
|
module Transcript
|
83
131
|
extend Entity
|
84
132
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
133
|
+
self.annotation :format
|
134
|
+
self.annotation :organism
|
135
|
+
|
136
|
+
self.format = "Ensembl Transcript ID"
|
137
|
+
|
138
|
+
property :to! => :array2single do |new_format|
|
139
|
+
return self if format == new_format
|
140
|
+
Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
91
141
|
end
|
92
142
|
|
93
|
-
|
143
|
+
property :to => :array2single do |new_format|
|
94
144
|
return self if format == new_format
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
145
|
+
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
146
|
+
end
|
147
|
+
|
148
|
+
def ensembl
|
149
|
+
to "Ensembl Transcript ID"
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
property :sequence => :array2single do
|
154
|
+
transcript_sequence = Organism.transcript_sequence(organism).tsv :persist => true
|
155
|
+
transcript_sequence.unnamed = true
|
156
|
+
transcript_sequence.values_at *self.ensembl
|
157
|
+
end
|
158
|
+
|
159
|
+
property :sequence_length => :array2single do
|
160
|
+
sequence.collect{|s|
|
161
|
+
s.nil? ? nil : s.length
|
162
|
+
}
|
163
|
+
end
|
164
|
+
|
165
|
+
property :protein => :array2single do
|
166
|
+
transcript_protein = Organism.transcripts(organism).tsv :single, :persist => true, :fields => ["Ensembl Protein ID"]
|
167
|
+
transcript_protein.unnamed = true
|
168
|
+
|
169
|
+
res = transcript_protein.values_at(*self.ensembl)
|
170
|
+
Protein.setup(res, "Ensembl Protein ID", organism)
|
171
|
+
res
|
101
172
|
end
|
102
173
|
end
|
174
|
+
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/mutation/mutation_assessor'
|
5
|
+
require 'rbbt/entity/protein'
|
6
|
+
require 'rbbt/entity/gene'
|
7
|
+
require 'rbbt/entity/mutated_isoform'
|
8
|
+
|
9
|
+
Workflow.require_workflow "Sequence"
|
10
|
+
|
11
|
+
module GenomicMutation
|
12
|
+
extend Entity
|
13
|
+
self.annotation :jobname
|
14
|
+
self.annotation :organism
|
15
|
+
self.annotation :watson
|
16
|
+
|
17
|
+
self.format = "Genomic Mutation"
|
18
|
+
|
19
|
+
property :score => :single2array do
|
20
|
+
self.split(":")[3].to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
property :position => :single2array do
|
25
|
+
self.split(":")[1].to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
property :offset_in_genes => :array2single do
|
29
|
+
gene2chr_start = Misc.process_to_hash(genes.flatten){|list| list.chr_start}
|
30
|
+
position.zip(genes).collect{|position, list|
|
31
|
+
list.collect{|gene|
|
32
|
+
next if not gene2chr_start.include? gene
|
33
|
+
[gene, position.to_i - gene2chr_start[gene]] * ":"
|
34
|
+
}.compact
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
property :genes => :array2single do
|
39
|
+
@genes ||= begin
|
40
|
+
genes = Sequence.job(:genes_at_genomic_positions, jobname, :organism => organism, :positions => self).run
|
41
|
+
genes.unnamed = true
|
42
|
+
genes = genes.values_at *self
|
43
|
+
Gene.setup(genes, "Ensembl Gene ID", organism)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
property :mutated_isoforms => :array2single do
|
48
|
+
@mutated_isoforms ||= begin
|
49
|
+
res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self).run.values_at *self
|
50
|
+
res.each{|list| list.organism = organism}
|
51
|
+
res
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
property :exon_junctions do
|
56
|
+
@exon_junctions ||= Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self).run.values_at *self
|
57
|
+
end
|
58
|
+
|
59
|
+
property :in_exon_junction? => :array2single do
|
60
|
+
exon_junctions.collect{|l| not l.nil? and not l.empty?}
|
61
|
+
end
|
62
|
+
|
63
|
+
property :over_gene? => :array2single do |gene|
|
64
|
+
@over_genes ||= {}
|
65
|
+
@over_genes[gene] ||= genes.clean_annotations.collect{|list| list.include? gene}
|
66
|
+
end
|
67
|
+
|
68
|
+
property :mutation_assessor_scores => :array2single do
|
69
|
+
@mutation_assessor_scores ||= begin
|
70
|
+
mutated_isoforms = self.mutated_isoforms
|
71
|
+
all_mutated_isoforms = MutatedIsoform.setup(mutated_isoforms.flatten.compact, organism)
|
72
|
+
mutated_isoform2damage_score = Misc.process_to_hash(all_mutated_isoforms){|list| all_mutated_isoforms.mutation_assessor_scores}
|
73
|
+
|
74
|
+
MutatedIsoform.setup(mutated_isoforms.collect{|list| list.nil? ? [] : mutated_isoform2damage_score.values_at(*list)}, organism)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
property :truncated do
|
79
|
+
@truncated ||= begin
|
80
|
+
mutated_isoforms = self.mutated_isoforms
|
81
|
+
all_mutated_isoforms = MutatedIsoform.setup(mutated_isoforms.flatten.compact, organism)
|
82
|
+
mutated_isoform2truncated = Misc.process_to_hash(all_mutated_isoforms){|list| all_mutated_isoforms.truncated}
|
83
|
+
mutated_isoforms.collect{|list| list.nil? ? [] : mutated_isoform2truncated.values_at(*list)}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
property :affected_exons => :array2single do
|
88
|
+
@affected_exons ||= begin
|
89
|
+
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self).run.values_at *self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
data/lib/rbbt/entity/genotype.rb
CHANGED
@@ -1,186 +1,162 @@
|
|
1
|
-
require 'rbbt/entity'
|
2
1
|
require 'rbbt/workflow'
|
3
|
-
require 'rbbt/
|
4
|
-
require 'rbbt/
|
5
|
-
require 'rbbt/entity/protein'
|
6
|
-
|
7
|
-
Workflow.require_workflow "Sequence"
|
8
|
-
|
9
|
-
module MutatedIsoform
|
10
|
-
extend Entity
|
11
|
-
self.annotation :organism
|
2
|
+
require 'rbbt/entity'
|
3
|
+
require 'rbbt/entity/genomic_mutation'
|
12
4
|
|
13
|
-
|
5
|
+
module Genotype
|
6
|
+
extend Workflow
|
14
7
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
else
|
19
|
-
Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
|
8
|
+
if self.respond_to? :extended
|
9
|
+
class << self
|
10
|
+
alias prev_genotype_extended extended
|
20
11
|
end
|
21
12
|
end
|
22
13
|
|
23
|
-
def
|
24
|
-
if
|
25
|
-
|
26
|
-
|
27
|
-
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
28
|
-
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
14
|
+
def self.extended(base)
|
15
|
+
prev_genotype_extended(base) if self.respond_to? :prev_genotype_extended
|
16
|
+
base.helper :genotype do
|
17
|
+
base
|
29
18
|
end
|
30
19
|
end
|
31
20
|
|
32
|
-
|
33
|
-
|
34
|
-
prot, change = self.split(":")
|
35
|
-
|
36
|
-
case
|
37
|
-
when change =~ /UTR/
|
38
|
-
"UTR"
|
39
|
-
when (change[0] == ASTERISK and not change[0] == change[-1])
|
40
|
-
"NOSTOP"
|
41
|
-
when (change[-1] == ASTERISK and not change[0] == change[-1])
|
42
|
-
"NONSENSE"
|
43
|
-
when change =~ /Indel/
|
44
|
-
"INDEL"
|
45
|
-
when change =~ /FrameShift/
|
46
|
-
"FRAMESHIFT"
|
47
|
-
when change[0] == change[-1]
|
48
|
-
"SYNONYMOUS"
|
49
|
-
else
|
50
|
-
"MISS-SENSE"
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def ary_type
|
55
|
-
self.collect{|mutation| mutation.single_type}
|
56
|
-
end
|
57
|
-
|
21
|
+
module Cohort
|
22
|
+
extend Workflow
|
58
23
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def filter(*types)
|
64
|
-
list = self.zip(type).select do |mutation, type|
|
65
|
-
types.include? type
|
66
|
-
end.collect{|mutation, type| mutation}
|
67
|
-
|
68
|
-
MutatedIsoform.setup(list, organism)
|
69
|
-
end
|
70
|
-
|
71
|
-
def self2mutation_assessor_prediction
|
72
|
-
if Array === self
|
73
|
-
filtered = filter "MISS-SENSE"
|
74
|
-
correspondance = {}
|
75
|
-
mutations = filtered.zip(filtered.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
|
76
|
-
prot, change = mutation.split(":")
|
77
|
-
next if uniprot.nil?
|
78
|
-
uniprot_change = [uniprot, change]
|
79
|
-
correspondance[uniprot_change] = mutation
|
80
|
-
uniprot_change
|
81
|
-
end.compact
|
82
|
-
|
83
|
-
tsv = MutationAssessor.chunked_predict(mutations)
|
84
|
-
return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"]) if tsv.empty?
|
85
|
-
tsv.add_field "Mutated Isoform" do |key, values|
|
86
|
-
correspondance[key.split(" ")]
|
24
|
+
if self.respond_to? :extended
|
25
|
+
class << self
|
26
|
+
alias prev_genotype_cohort_extended extended
|
87
27
|
end
|
88
|
-
tsv.reorder "Mutated Isoform", ["Func. Impact"]
|
89
|
-
else
|
90
|
-
prot, change = mutation.split(":")
|
91
|
-
uniprot = protein.to "UniProt/SwissProt ID"
|
92
|
-
mutations = [uniprot, change]
|
93
|
-
|
94
|
-
tsv = MutationAssessor.chunked_predict(mutations)
|
95
|
-
tsv.add_field "Mutated Isoform" do |key, values|
|
96
|
-
self
|
97
|
-
end
|
98
|
-
tsv.reorder "Mutated Isoform", ["Func. Impact"]
|
99
28
|
end
|
100
|
-
end
|
101
29
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
30
|
+
def self.extended(base)
|
31
|
+
prev_genotype_cohort_extended(base) if self.respond_to? :prev_genotype_cohort_extended
|
32
|
+
|
33
|
+
class << base
|
34
|
+
attr_accessor :metagenotype
|
35
|
+
|
36
|
+
def jobname
|
37
|
+
if @jobname.nil?
|
38
|
+
@jobname ||= "Meta-genotype: " + self.collect{|g| g.jobname} * ", "
|
39
|
+
@jobname[100..-1] = " (etc; #{self.length} genotypes)" if @jobname.length > 100
|
40
|
+
end
|
41
|
+
@jobname
|
42
|
+
end
|
43
|
+
|
44
|
+
def metagenotype
|
45
|
+
if @metagenotype.nil?
|
46
|
+
@metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].watson)
|
47
|
+
@metagenotype.extend Genotype unless Genotype === @metagenotype
|
48
|
+
end
|
49
|
+
@metagenotype
|
50
|
+
end
|
51
|
+
end unless base.respond_to? :metagenotype
|
52
|
+
|
53
|
+
base.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
|
54
|
+
|
55
|
+
base.helper :metagenotype do
|
56
|
+
base.metagenotype
|
110
57
|
end
|
111
|
-
}
|
112
|
-
end
|
113
58
|
|
114
|
-
|
115
|
-
|
116
|
-
filter("FRAMESHIFT").select{|isoform_mutation|
|
117
|
-
protein, mutation = isoform_mutation.split ":"
|
118
|
-
if protein_sequences.include? protein
|
119
|
-
mutation.match(/(\d+)/)[1].to_f < protein_sequences[protein].length.to_f * 0.7
|
120
|
-
else
|
121
|
-
false
|
59
|
+
base.helper :samples do
|
60
|
+
base
|
122
61
|
end
|
123
|
-
}
|
124
|
-
end
|
125
62
|
|
126
|
-
|
127
|
-
|
63
|
+
NamedArray.setup(base, base.collect{|base| base.jobname})
|
64
|
+
end
|
65
|
+
|
66
|
+
returns "Ensembl Gene ID"
|
67
|
+
task :all_affected_genes => :array do
|
68
|
+
set_info :organism, metagenotype.organism
|
69
|
+
samples.collect{|genotype| genotype.all_affected_genes}.flatten.uniq
|
70
|
+
end
|
128
71
|
|
129
|
-
|
130
|
-
|
72
|
+
returns "Ensembl Gene ID"
|
73
|
+
task :damaged_genes => :array do
|
74
|
+
set_info :organism, metagenotype.organism
|
75
|
+
samples.collect{|genotype| genotype.damaged_genes}.flatten.uniq
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
returns "Ensembl Gene ID"
|
80
|
+
task :recurrent_genes => :array do
|
81
|
+
set_info :organism, metagenotype.organism
|
82
|
+
count = Hash.new(0)
|
83
|
+
samples.each do |genotype| genotype.genes.flatten.uniq.each{|gene| count[gene] += 1} end
|
84
|
+
count.select{|gene, c| c > 1}.collect{|gene,c| gene.dup}
|
85
|
+
end
|
131
86
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
value = levels.index(v[0].to_s)
|
137
|
-
value and value >= cutoff
|
87
|
+
%w(damaged_genes recurrent_genes all_affected_genes).each do |name|
|
88
|
+
define_method name do |*args|
|
89
|
+
@cache ||= {}
|
90
|
+
@cache[[name, args]] ||= self.job(name, self.jobname).run
|
138
91
|
end
|
139
|
-
|
92
|
+
end
|
140
93
|
|
141
|
-
|
142
|
-
predicted += early_frameshifts if options[:frameshift]
|
94
|
+
end
|
143
95
|
|
144
|
-
|
96
|
+
returns "Ensembl Gene ID"
|
97
|
+
task :all_affected_genes => :array do
|
98
|
+
set_info :organism, genotype.organism
|
99
|
+
genotype.genes.clean_annotations.flatten.uniq
|
145
100
|
end
|
146
|
-
end
|
147
101
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
102
|
+
dep :all_affected_genes
|
103
|
+
returns "Ensembl Gene ID"
|
104
|
+
task :long_genes => :array do
|
105
|
+
all_affected_genes = step(:all_affected_genes).load
|
106
|
+
long_genes = all_affected_genes.select{|gene|
|
107
|
+
length = gene.max_protein_length
|
108
|
+
length and length > 1000 or gene.name =~ /^PCDH/
|
109
|
+
}
|
152
110
|
|
153
|
-
|
111
|
+
set_info :organism, genotype.organism
|
112
|
+
long_genes.clean_annotations
|
113
|
+
end
|
154
114
|
|
155
|
-
|
156
|
-
|
115
|
+
returns "Ensembl Gene ID"
|
116
|
+
task :mutations_in_exon_junctions => :array do
|
117
|
+
set_info :organism, genotype.organism
|
118
|
+
genotype.select{|mutation| mutation.in_exon_junction?}.clean_annotations
|
157
119
|
end
|
158
120
|
|
159
|
-
|
160
|
-
|
121
|
+
returns "Ensembl Gene ID"
|
122
|
+
input :threshold, :float, "from 0 to 1", 0.5
|
123
|
+
task :with_damaged_isoforms => :array do |threshold|
|
124
|
+
set_info :organism, genotype.organism
|
125
|
+
mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores}
|
126
|
+
genotype.select{|mutation| if mutation.mutated_isoforms then mutated_isoform_damage.values_at(*mutation.mutated_isoforms.flatten.compact).select{|score| not score.nil? and score > threshold}.any? else false; end}.genes.flatten.uniq.clean_annotations
|
161
127
|
end
|
162
128
|
|
163
|
-
|
164
|
-
|
129
|
+
returns "Ensembl Gene ID"
|
130
|
+
task :truncated => :array do
|
131
|
+
set_info :organism, genotype.organism
|
132
|
+
MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").
|
133
|
+
select{|isoform_mutation| isoform_mutation.truncated }.
|
134
|
+
protein.gene.to("Ensembl Gene ID").uniq.clean_annotations
|
165
135
|
end
|
166
136
|
|
167
|
-
|
168
|
-
|
137
|
+
returns "Ensembl Gene ID"
|
138
|
+
task :affected_exon_junctions => :array do
|
139
|
+
set_info :organism, genotype.organism
|
140
|
+
genotype.select{|mutation| mutation.in_exon_junction?}.genes.flatten.clean_annotations
|
169
141
|
end
|
170
142
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
143
|
+
dep :with_damaged_isoforms, :truncated, :affected_exon_junctions
|
144
|
+
returns "Ensembl Gene ID"
|
145
|
+
task :damaged_genes => :array do
|
146
|
+
set_info :organism, genotype.organism
|
147
|
+
|
148
|
+
with_damaged_isoforms = step(:with_damaged_isoforms).load.clean_annotations
|
149
|
+
truncated = step(:truncated).load.clean_annotations
|
150
|
+
affected_exon_junctions = step(:affected_exon_junctions).load.clean_annotations
|
151
|
+
|
152
|
+
(with_damaged_isoforms + truncated + affected_exon_junctions).uniq
|
178
153
|
end
|
179
154
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
155
|
+
%w(all_affected_genes damaged_genes truncated with_damaged_isoforms affected_exon_junctions long_genes recurrent_genes).each do |name|
|
156
|
+
define_method name do |*args|
|
157
|
+
@cache ||= {}
|
158
|
+
@cache[[name, args]] ||= self.job(name, self.jobname).run
|
159
|
+
end
|
185
160
|
end
|
186
161
|
end
|
162
|
+
|
@@ -0,0 +1,179 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/mutation/mutation_assessor'
|
5
|
+
require 'rbbt/mutation/sift'
|
6
|
+
require 'rbbt/entity/protein'
|
7
|
+
require 'rbbt/entity/gene'
|
8
|
+
require 'nokogiri'
|
9
|
+
|
10
|
+
module MutatedIsoform
|
11
|
+
extend Entity
|
12
|
+
self.annotation :organism
|
13
|
+
|
14
|
+
self.format = "Mutated Isoform"
|
15
|
+
|
16
|
+
property :protein do
|
17
|
+
if Array === self
|
18
|
+
Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
|
19
|
+
else
|
20
|
+
Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
property :change => :single2array do
|
25
|
+
self.split(":").last
|
26
|
+
end
|
27
|
+
|
28
|
+
property :position => :single2array do
|
29
|
+
if change.match(/[^\d](\d+)[^\d]/)
|
30
|
+
$1.to_i
|
31
|
+
else
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
property :ensembl_protein_image_url => :single2array do
|
37
|
+
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
38
|
+
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
39
|
+
end
|
40
|
+
|
41
|
+
property :marked_svg => :single2array do
|
42
|
+
svg = Open.read(protein.ensembl_protein_image_url)
|
43
|
+
seq_len = protein.sequence_length
|
44
|
+
position = self.position
|
45
|
+
|
46
|
+
|
47
|
+
doc = Nokogiri::XML(svg)
|
48
|
+
width = doc.css('svg').first.attr('width').to_f
|
49
|
+
height = doc.css('svg').first.attr('height').to_f
|
50
|
+
start = doc.css('rect.ac').first.attr('x').to_f
|
51
|
+
|
52
|
+
if width and height and start and seq_len and position
|
53
|
+
offset = (width - start)/seq_len * position + start
|
54
|
+
svg.sub(/<\/svg>/,"<rect x='#{offset}' y='1' width='1' height='#{height}' style='fill:rgb(255,0,0);opacity:0.5;stroke:none;'></svg>")
|
55
|
+
else
|
56
|
+
svg
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
ASTERISK = "*"[0]
|
61
|
+
CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
|
62
|
+
property :consecuence => :single2array do
|
63
|
+
prot, change = self.split(":")
|
64
|
+
|
65
|
+
case
|
66
|
+
when change =~ /UTR/
|
67
|
+
"UTR"
|
68
|
+
when (change[0] == ASTERISK and not change[0] == change[-1])
|
69
|
+
"NOSTOP"
|
70
|
+
when (change[-1] == ASTERISK and not change[0] == change[-1])
|
71
|
+
"NONSENSE"
|
72
|
+
when change =~ /Indel/
|
73
|
+
"INDEL"
|
74
|
+
when change =~ /FrameShift/
|
75
|
+
"FRAMESHIFT"
|
76
|
+
when change[0] == change[-1]
|
77
|
+
"SYNONYMOUS"
|
78
|
+
else
|
79
|
+
"MISS-SENSE"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
property :truncated => :array2single do
|
84
|
+
@truncated ||= begin
|
85
|
+
protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
|
86
|
+
self.collect do |isoform_mutation|
|
87
|
+
|
88
|
+
next if isoform_mutation.consecuence != "FRAMESHIFT" and isoform_mutation.consecuence != "NONSENSE"
|
89
|
+
protein = isoform_mutation.protein
|
90
|
+
position = isoform_mutation.position
|
91
|
+
sequence_length = protein2sequence_length[protein]
|
92
|
+
|
93
|
+
case
|
94
|
+
when (sequence_length.nil? or position.nil?)
|
95
|
+
nil
|
96
|
+
when position < sequence_length.to_f * 0.7
|
97
|
+
true
|
98
|
+
else
|
99
|
+
false
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
property :damage_scores => :array2single do
|
107
|
+
@damage_scores ||= begin
|
108
|
+
sift_scores.zip(mutation_assessor_scores).collect{|p|
|
109
|
+
p = p.compact
|
110
|
+
if p.empty?
|
111
|
+
nil
|
112
|
+
else
|
113
|
+
p.inject(0.0){|acc, e| acc += e} / p.length
|
114
|
+
end
|
115
|
+
}
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
property :sift_scores => :array2single do
|
120
|
+
@sift_scores ||= begin
|
121
|
+
missense = self.select{|iso_mut| iso_mut.consecuence == "MISS-SENSE"}
|
122
|
+
|
123
|
+
values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
|
124
|
+
v.nil? ? nil : v["Prediction"]
|
125
|
+
}
|
126
|
+
|
127
|
+
range = {nil => nil,
|
128
|
+
"" => nil,
|
129
|
+
"TOLERATED" => 0,
|
130
|
+
"*DAMAGING" => 1,
|
131
|
+
"DAMAGING" => 1}
|
132
|
+
|
133
|
+
range.values_at *values
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
property :mutation_assessor_scores => :array2single do
|
138
|
+
@mutation_assesor_scores ||= begin
|
139
|
+
missense = self.select{|mutation| mutation.consecuence == "MISS-SENSE"}
|
140
|
+
|
141
|
+
correspondance = {}
|
142
|
+
list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
|
143
|
+
prot, change = mutation.split(":")
|
144
|
+
next if uniprot.nil?
|
145
|
+
uniprot_change = [uniprot, change]
|
146
|
+
correspondance[uniprot_change] ||= []
|
147
|
+
correspondance[uniprot_change] << mutation
|
148
|
+
uniprot_change
|
149
|
+
end.compact
|
150
|
+
|
151
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
|
152
|
+
return [nil] * self.length if list.empty?
|
153
|
+
|
154
|
+
tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
|
155
|
+
|
156
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
|
157
|
+
return [nil] * self.length if tsv.empty?
|
158
|
+
|
159
|
+
new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
|
160
|
+
|
161
|
+
tsv.each do |key, values|
|
162
|
+
correspondance[key.split(" ")].each do |mutation|
|
163
|
+
new[mutation] = values["Func. Impact"]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
range = {nil => nil,
|
169
|
+
"" => nil,
|
170
|
+
"neutral" => 0,
|
171
|
+
"low" => 0.3,
|
172
|
+
"medium" => 0.6,
|
173
|
+
"high" => 1}
|
174
|
+
|
175
|
+
range.values_at *new.values_at(*self)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/sources/pubmed'
|
3
|
+
|
4
|
+
module PMID
|
5
|
+
extend Entity
|
6
|
+
|
7
|
+
self.format = "PMID"
|
8
|
+
|
9
|
+
property :title => :array2single do
|
10
|
+
@title ||= begin
|
11
|
+
PubMed.get_article(self).values_at(*self).collect{|article| article.nil? ? nil : article.title}
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
property :pubmed_url => :single2array do
|
16
|
+
"<a class='pmid' href='http://www.ncbi.nlm.nih.gov/pubmed/#{self}'>#{ self }</a>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
data/lib/rbbt/entity/protein.rb
CHANGED
@@ -3,6 +3,7 @@ require 'rbbt/workflow'
|
|
3
3
|
require 'rbbt/sources/organism'
|
4
4
|
require 'rbbt/statistics/hypergeometric'
|
5
5
|
require 'rbbt/network/paths'
|
6
|
+
require 'rbbt/entity/gene'
|
6
7
|
|
7
8
|
Workflow.require_workflow "Translation"
|
8
9
|
|
@@ -14,17 +15,48 @@ module Protein
|
|
14
15
|
self.annotation :format
|
15
16
|
self.annotation :organism
|
16
17
|
|
17
|
-
|
18
|
-
|
18
|
+
self.format = "Ensembl Protein ID"
|
19
|
+
|
20
|
+
def ensembl
|
21
|
+
to "Ensembl Protein ID"
|
22
|
+
end
|
23
|
+
|
24
|
+
property :ensembl_protein_image_url => :single2array do
|
25
|
+
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
26
|
+
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{ensembl};_rmd=d2a8;export=svg"
|
19
27
|
end
|
20
28
|
|
21
|
-
|
29
|
+
property :to! => :array2single do |new_format|
|
22
30
|
return self if format == new_format
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
31
|
+
Protein.setup(Translation.job(:translate_protein, "", :organism => organism, :proteins => self, :format => new_format).exec, new_format, organism)
|
32
|
+
end
|
33
|
+
|
34
|
+
property :to => :array2single do |new_format|
|
35
|
+
return self if format == new_format
|
36
|
+
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
37
|
+
end
|
38
|
+
|
39
|
+
property :gene do
|
40
|
+
Gene.setup(to("Ensembl Protein ID").clean_annotations, "Ensembl Protein ID", organism)
|
41
|
+
end
|
42
|
+
|
43
|
+
property :pfam => :array2single do
|
44
|
+
index = Organism.gene_pfam(organism).tsv :flat, :persist => true
|
45
|
+
index.unnamed = true
|
46
|
+
pfam = index.values_at(*self).flatten
|
47
|
+
Pfam.setup pfam
|
48
|
+
end
|
49
|
+
|
50
|
+
property :sequence => :array2single do
|
51
|
+
@protein_sequence ||= begin
|
52
|
+
protein_sequence = Organism.protein_sequence(organism).tsv :persist => true
|
53
|
+
protein_sequence.unnamed = true
|
54
|
+
protein_sequence.values_at(*self.ensembl)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
property :sequence_length => :array2single do
|
59
|
+
sequence.collect{|seq| seq.nil? ? nil : seq.length}
|
28
60
|
end
|
29
61
|
end
|
30
62
|
|
@@ -6,9 +6,49 @@ require 'test/unit'
|
|
6
6
|
require 'rbbt/entity/gene'
|
7
7
|
|
8
8
|
class TestGene < Test::Unit::TestCase
|
9
|
+
CDK5 = Gene.setup("CDK5", "Associated Gene Name", "Hsa")
|
10
|
+
TP53 = Gene.setup("TP53", "Associated Gene Name", "Hsa")
|
11
|
+
TWO = Gene.setup(["CDK5", "TP53"], "Associated Gene Name", "Hsa")
|
12
|
+
|
9
13
|
def test_to
|
10
14
|
assert_equal "1020", Gene.setup("CDK5", "Associated Gene Name", "Hsa").to("Entrez Gene ID")
|
11
15
|
end
|
16
|
+
|
17
|
+
def test_long_name
|
18
|
+
assert_equal "cyclin-dependent kinase 5", Gene.setup("CDK5", "Associated Gene Name", "Hsa").long_name
|
19
|
+
assert_equal ["cyclin-dependent kinase 5"], Gene.setup(["CDK5"], "Associated Gene Name", "Hsa").long_name
|
20
|
+
|
21
|
+
assert_match /tumor/, Gene.setup("TP53", "Associated Gene Name", "Hsa").description
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_transcripts
|
25
|
+
assert CDK5.transcripts.length > 1
|
26
|
+
assert_equal "Hsa", CDK5.transcripts.organism
|
27
|
+
assert_equal "Hsa", CDK5.make_list.transcripts.first.organism
|
28
|
+
assert_equal "Hsa", CDK5.transcripts.make_list.organism
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_proteins
|
32
|
+
assert CDK5.proteins.length > 1
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_max_protein_length
|
36
|
+
assert CDK5.max_protein_length > 200
|
37
|
+
assert Array === TWO.max_protein_length
|
38
|
+
assert TWO.max_protein_length.first > 200
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_max_transcript_length
|
42
|
+
assert CDK5.max_transcript_length > 200
|
43
|
+
assert Array === TWO.max_transcript_length
|
44
|
+
assert TWO.max_transcript_length.first > 200
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_range
|
48
|
+
assert Range === CDK5.range
|
49
|
+
assert Range === CDK5.make_list.range.first
|
50
|
+
end
|
51
|
+
|
12
52
|
end
|
13
53
|
|
14
54
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'rbbt/entity/genomic_mutation'
|
7
|
+
|
8
|
+
class TestGenomicMutation < Test::Unit::TestCase
|
9
|
+
MUTATION = GenomicMutation.setup("10:124745844:A:158", "Test", "Hsa/jun2011")
|
10
|
+
SPLICING = GenomicMutation.setup("18:14787040:A", "Test", "Hsa/jun2011")
|
11
|
+
GENOTYPE = GenomicMutation.setup(Rbbt.data.genotype.list, "Test", "Hsa/jun2011")
|
12
|
+
|
13
|
+
def test_genes
|
14
|
+
assert GENOTYPE.genes.flatten.to("Associated Gene Name").include? "PSTK"
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_consolidate
|
18
|
+
assert GENOTYPE.genes.consolidate.to("Associated Gene Name").include? "PSTK"
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_mutated_isoforms
|
22
|
+
assert MUTATION.mutated_isoforms.length > 1
|
23
|
+
assert ["PSTK"], MUTATION.mutated_isoforms.protein.gene.to("Associated Gene Name").uniq
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_exon_junction
|
27
|
+
assert(!(MUTATION.in_exon_junction?))
|
28
|
+
assert SPLICING.in_exon_junction?
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_over_gene
|
32
|
+
assert MUTATION.over_gene? Gene.setup("PSTK", "Associated Gene Name", "Hsa/jun2011").ensembl
|
33
|
+
assert(!(SPLICING.over_gene? Gene.setup("PSTK", "Associated Gene Name", "Hsa/jun2011").ensembl))
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'rbbt/entity/mutated_isoform'
|
7
|
+
|
8
|
+
class TestMutatedIsoform < Test::Unit::TestCase
|
9
|
+
MUTATION = MutatedIsoform.setup("ENSP00000275493:G719A", "Hsa/jun2011")
|
10
|
+
|
11
|
+
def test_protein
|
12
|
+
assert_equal "EGFR", MUTATION.protein.gene.to("Associated Gene Name")
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_truncated
|
16
|
+
|
17
|
+
protein = Protein.setup(Gene.setup("CDK5", "Associated Gene Name", "Hsa/jun2011").to("Ensembl Protein ID"), "Ensembl Protein ID", "Hsa/jun2011")
|
18
|
+
|
19
|
+
change_position = (protein.sequence_length.to_f * 0.5).to_i
|
20
|
+
wildtype = protein.sequence[(change_position..change_position)]
|
21
|
+
mutation = "*"
|
22
|
+
new_mutation = wildtype << change_position.to_s << mutation
|
23
|
+
mutation = MutatedIsoform.setup([protein, new_mutation] * ":", "Hsa/jun2011")
|
24
|
+
assert mutation.truncated
|
25
|
+
|
26
|
+
|
27
|
+
change_position = (protein.sequence_length.to_f * 0.9).to_i
|
28
|
+
wildtype = protein.sequence[(change_position..change_position)]
|
29
|
+
mutation = "*"
|
30
|
+
new_mutation = wildtype << change_position.to_s << mutation
|
31
|
+
mutation = MutatedIsoform.setup([protein, new_mutation] * ":", "Hsa/jun2011")
|
32
|
+
assert !mutation.truncated
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'rbbt/entity/protein'
|
7
|
+
|
8
|
+
class TestProtein < Test::Unit::TestCase
|
9
|
+
PROTEIN = Protein.setup("ENSP00000275493", "Hsa/jun2011")
|
10
|
+
PROTEIN_ARRAY = Protein.setup(["ENSP00000275493"], "Hsa/jun2011")
|
11
|
+
|
12
|
+
def test_clean_annotations
|
13
|
+
assert Protein === PROTEIN
|
14
|
+
assert(!(Protein === PROTEIN.clean_annotations))
|
15
|
+
assert Gene === PROTEIN.gene
|
16
|
+
assert(!(Protein === PROTEIN.gene))
|
17
|
+
assert(PROTEIN_ARRAY.respond_to? :annotated_array_clean_each)
|
18
|
+
assert(!(PROTEIN_ARRAY.clean_annotations.respond_to? :annotated_array_clean_each))
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_gene
|
22
|
+
assert_equal "EGFR", PROTEIN.gene.to("Associated Gene Name")
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
|
File without changes
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-entities
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
|
-
- 0
|
8
7
|
- 1
|
9
8
|
- 0
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-11-17 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -71,12 +71,20 @@ extra_rdoc_files:
|
|
71
71
|
files:
|
72
72
|
- LICENSE
|
73
73
|
- lib/rbbt/entity.rb
|
74
|
+
- lib/rbbt/entity/cnv.rb
|
74
75
|
- lib/rbbt/entity/gene.rb
|
76
|
+
- lib/rbbt/entity/genomic_mutation.rb
|
75
77
|
- lib/rbbt/entity/genotype.rb
|
76
78
|
- lib/rbbt/entity/misc.rb
|
79
|
+
- lib/rbbt/entity/mutated_isoform.rb
|
80
|
+
- lib/rbbt/entity/pmid.rb
|
77
81
|
- lib/rbbt/entity/protein.rb
|
78
82
|
- test/test_helper.rb
|
79
83
|
- test/rbbt/entity/test_gene.rb
|
84
|
+
- test/rbbt/entity/test_genomic_mutation.rb
|
85
|
+
- test/rbbt/entity/test_mutated_isoform.rb
|
86
|
+
- test/rbbt/entity/test_protein.rb
|
87
|
+
- test/rbbt/test_entity.rb
|
80
88
|
has_rdoc: true
|
81
89
|
homepage: http://github.com/mikisvaz/rbbt-util
|
82
90
|
licenses: []
|
@@ -114,3 +122,7 @@ summary: Entities for the Ruby Bioinformatics Toolkit (rbbt)
|
|
114
122
|
test_files:
|
115
123
|
- test/test_helper.rb
|
116
124
|
- test/rbbt/entity/test_gene.rb
|
125
|
+
- test/rbbt/entity/test_genomic_mutation.rb
|
126
|
+
- test/rbbt/entity/test_mutated_isoform.rb
|
127
|
+
- test/rbbt/entity/test_protein.rb
|
128
|
+
- test/rbbt/test_entity.rb
|