rbbt-entities 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/entity.rb +89 -0
- data/lib/rbbt/entity/cnv.rb +37 -0
- data/lib/rbbt/entity/gene.rb +126 -54
- data/lib/rbbt/entity/genomic_mutation.rb +93 -0
- data/lib/rbbt/entity/genotype.rb +124 -148
- data/lib/rbbt/entity/mutated_isoform.rb +179 -0
- data/lib/rbbt/entity/pmid.rb +19 -0
- data/lib/rbbt/entity/protein.rb +40 -8
- data/test/rbbt/entity/test_gene.rb +40 -0
- data/test/rbbt/entity/test_genomic_mutation.rb +38 -0
- data/test/rbbt/entity/test_mutated_isoform.rb +37 -0
- data/test/rbbt/entity/test_protein.rb +27 -0
- data/test/rbbt/test_entity.rb +0 -0
- metadata +16 -4
data/lib/rbbt/entity.rb
CHANGED
@@ -27,6 +27,95 @@ module Entity
|
|
27
27
|
Entity.formats[format] = self
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
def clean_annotations
|
32
|
+
case
|
33
|
+
when Array === self
|
34
|
+
self.annotated_array_clean_collect{|e| e.respond_to?(:clean_annotations)? e.clean_annotations : e}
|
35
|
+
when String === self
|
36
|
+
"" << self
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def consolidate
|
41
|
+
self.inject(nil){|acc,e|
|
42
|
+
if acc.nil?
|
43
|
+
acc = e
|
44
|
+
else
|
45
|
+
acc.concat e
|
46
|
+
end
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def property(name, &block)
|
53
|
+
case
|
54
|
+
when (Hash === name and name.size == 1)
|
55
|
+
name, type = name.collect.first
|
56
|
+
when (String === name or Symbol === name)
|
57
|
+
type = :both
|
58
|
+
else
|
59
|
+
raise "Format of name ( => type) not understood: #{name.inspect}"
|
60
|
+
end
|
61
|
+
|
62
|
+
name = name.to_s unless String === name
|
63
|
+
|
64
|
+
case type
|
65
|
+
when :both
|
66
|
+
self.module_eval do define_method name, &block end
|
67
|
+
when :array
|
68
|
+
self.module_eval do
|
69
|
+
ary_name = "_ary_" << name
|
70
|
+
define_method ary_name, &block
|
71
|
+
define_method name do |*args|
|
72
|
+
raise "Method #{ name } only defined for array" unless Array === self
|
73
|
+
self.send(ary_name, *args)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
when :single
|
77
|
+
self.module_eval do
|
78
|
+
single_name = "_single_" << name
|
79
|
+
define_method single_name, &block
|
80
|
+
define_method name do |*args|
|
81
|
+
raise "Method #{ name } not defined for array" if Array === self
|
82
|
+
self.send(single_name, *args)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
when :single2array
|
86
|
+
self.module_eval do
|
87
|
+
single_name = "_single_" << name
|
88
|
+
define_method single_name, &block
|
89
|
+
define_method name do |*args|
|
90
|
+
if Array === self
|
91
|
+
collect{|e| e.send(single_name, *args)}
|
92
|
+
else
|
93
|
+
self.send(single_name, *args)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
when :array2single
|
98
|
+
self.module_eval do
|
99
|
+
ary_name = "_ary_" << name
|
100
|
+
define_method ary_name, &block
|
101
|
+
define_method name do |*args|
|
102
|
+
case
|
103
|
+
when Array === self
|
104
|
+
self.send(ary_name, *args)
|
105
|
+
when (Array === self.container and self.container.respond_to? ary_name)
|
106
|
+
res = self.container.send(ary_name, *args)
|
107
|
+
if Hash === res
|
108
|
+
res[self]
|
109
|
+
else
|
110
|
+
pos = self.container.index self
|
111
|
+
res[pos]
|
112
|
+
end
|
113
|
+
else
|
114
|
+
res = self.make_list.send(ary_name, *args)
|
115
|
+
Hash === res ? res[self] : res[0]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
30
119
|
end
|
31
120
|
end
|
32
121
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/entity/gene'
|
5
|
+
|
6
|
+
Workflow.require_workflow "Sequence"
|
7
|
+
|
8
|
+
module CNV
|
9
|
+
extend Entity
|
10
|
+
self.annotation :jobname
|
11
|
+
self.annotation :organism
|
12
|
+
|
13
|
+
self.format = "Copy Number Variation"
|
14
|
+
|
15
|
+
property :variation => :single2array do
|
16
|
+
self.split(":").last
|
17
|
+
end
|
18
|
+
|
19
|
+
property :loss? => :array2single do
|
20
|
+
@loss ||= self.variation.collect{|v| v =~/loss/i}
|
21
|
+
end
|
22
|
+
|
23
|
+
property :gain? => :array2single do
|
24
|
+
@gain ||= self.variation.collect{|v| v =~/gain/i}
|
25
|
+
end
|
26
|
+
|
27
|
+
property :genes => :array2single do
|
28
|
+
@genes ||= begin
|
29
|
+
genes = Sequence.job(:genes_at_genomic_ranges, jobname, :organism => organism, :ranges => self).run
|
30
|
+
genes.unnamed = true
|
31
|
+
genes = genes.values_at *self
|
32
|
+
Gene.setup(genes, "Ensembl Gene ID", organism)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
data/lib/rbbt/entity/gene.rb
CHANGED
@@ -2,6 +2,8 @@ require 'rbbt/entity'
|
|
2
2
|
require 'rbbt/workflow'
|
3
3
|
require 'rbbt/sources/organism'
|
4
4
|
require 'rbbt/sources/entrez'
|
5
|
+
require 'rbbt/entity/protein'
|
6
|
+
require 'rbbt/entity/pmid'
|
5
7
|
|
6
8
|
Workflow.require_workflow "Translation"
|
7
9
|
|
@@ -11,50 +13,96 @@ module Gene
|
|
11
13
|
self.annotation :format
|
12
14
|
self.annotation :organism
|
13
15
|
|
14
|
-
self.format = Organism::Hsa.identifiers.all_fields
|
16
|
+
self.format = Organism::Hsa.identifiers.all_fields - ["Ensembl Protein ID", "Ensembl Transcript ID"]
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
+
property :to! => :array2single do |new_format|
|
19
|
+
return self if format == new_format
|
20
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
18
21
|
end
|
19
22
|
|
20
|
-
|
21
|
-
if
|
22
|
-
|
23
|
-
else
|
24
|
-
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
25
|
-
gene.nil? ? nil : gene.summary
|
26
|
-
end
|
23
|
+
property :to => :array2single do |new_format|
|
24
|
+
return self if format == new_format
|
25
|
+
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
27
26
|
end
|
28
27
|
|
29
|
-
|
30
|
-
|
31
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
32
|
-
else
|
33
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => [self], :format => new_format).exec[self], new_format, organism)
|
34
|
-
end
|
28
|
+
property :ensembl => :array2single do
|
29
|
+
@ensembl ||= to "Ensembl Gene ID"
|
35
30
|
end
|
36
31
|
|
37
|
-
|
38
|
-
|
39
|
-
if Array === self
|
40
|
-
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
41
|
-
else
|
42
|
-
v = to!(new_format)
|
43
|
-
v.nil? ? nil : v.first
|
44
|
-
end
|
32
|
+
property :entrez => :array2single do
|
33
|
+
@entrez ||= to "Entrez Gene ID"
|
45
34
|
end
|
46
35
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
36
|
+
|
37
|
+
property :name => :array2single do
|
38
|
+
@name ||= to "Associated Gene Name"
|
39
|
+
end
|
40
|
+
|
41
|
+
property :chr_start => :array2single do
|
42
|
+
@chr_start = begin
|
43
|
+
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
property :go_bp_terms => :array2single do
|
48
|
+
@go_bp_terms ||= Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat).values_at *self.ensembl
|
49
|
+
end
|
50
|
+
|
51
|
+
property :long_name => :single2array do
|
52
|
+
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
53
|
+
gene.nil? ? nil : gene.description.flatten.first
|
54
|
+
end
|
55
|
+
|
56
|
+
property :description => :single2array do
|
57
|
+
gene = Entrez.get_gene(to("Entrez Gene ID"))
|
58
|
+
gene.nil? ? nil : gene.summary.flatten.first
|
59
|
+
end
|
60
|
+
|
61
|
+
property :transcripts => :array2single do
|
62
|
+
gene_transcripts = Organism.gene_transcripts(organism).tsv :persist => true
|
63
|
+
gene_transcripts.unnamed = true
|
64
|
+
res = gene_transcripts.values_at(*self.ensembl)
|
65
|
+
res.each{|l| Transcript.setup(l, "Ensembl Transcript ID", organism)}
|
66
|
+
res
|
67
|
+
end
|
68
|
+
|
69
|
+
property :proteins => :array2single do
|
70
|
+
@proteins ||= begin
|
71
|
+
transcripts = self.transcripts
|
72
|
+
all_transcripts = Transcript.setup(transcripts.flatten, "Ensembl Transcript ID", organism)
|
73
|
+
transcript2protein = nil
|
74
|
+
|
75
|
+
transcript2protein = Misc.process_to_hash(all_transcripts){|list|
|
76
|
+
list.protein
|
77
|
+
}
|
78
|
+
|
79
|
+
res = nil
|
80
|
+
res = transcripts.collect{|list|
|
81
|
+
Protein.setup(transcript2protein.values_at(*list), "Ensembl Protein ID", organism)
|
82
|
+
}
|
83
|
+
|
84
|
+
res.each{|l|
|
85
|
+
}
|
86
|
+
res
|
87
|
+
end
|
54
88
|
end
|
55
89
|
|
56
|
-
|
90
|
+
property :max_transcript_length => :array2single do
|
91
|
+
transcripts.collect{|list| list.sequence_length.compact.max}
|
92
|
+
end
|
93
|
+
|
94
|
+
property :max_protein_length => :array2single do
|
95
|
+
@max_protein_length ||= begin
|
96
|
+
proteins = self.proteins
|
97
|
+
all_proteins = Protein.setup(proteins.flatten, "Ensembl Protein ID", organism)
|
98
|
+
lengths = Misc.process_to_hash(all_proteins){|list| list.sequence_length}
|
99
|
+
proteins.collect{|list| lengths.values_at(*list).compact.max}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
property :chromosome => :array2single do
|
57
104
|
chr = Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true
|
105
|
+
chr.unnamed = true
|
58
106
|
if Array === self
|
59
107
|
to("Ensembl Gene ID").collect do |gene|
|
60
108
|
chr[gene]
|
@@ -64,39 +112,63 @@ module Gene
|
|
64
112
|
end
|
65
113
|
end
|
66
114
|
|
67
|
-
|
115
|
+
property :range => :array2single do
|
68
116
|
pos = Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
Range.new *pos[gene]
|
73
|
-
end
|
74
|
-
else
|
75
|
-
return nil if not pos.include? to("Ensembl Gene ID")
|
76
|
-
Range.new *pos[to("Ensembl Gene ID")]
|
117
|
+
to("Ensembl Gene ID").collect do |gene|
|
118
|
+
next if not pos.include? gene
|
119
|
+
Range.new *pos[gene]
|
77
120
|
end
|
78
121
|
end
|
79
122
|
|
123
|
+
property :articles => :array2single do
|
124
|
+
@articles ||= begin
|
125
|
+
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat).values_at *self.entrez)
|
126
|
+
end
|
127
|
+
end
|
80
128
|
end
|
81
129
|
|
82
130
|
module Transcript
|
83
131
|
extend Entity
|
84
132
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
133
|
+
self.annotation :format
|
134
|
+
self.annotation :organism
|
135
|
+
|
136
|
+
self.format = "Ensembl Transcript ID"
|
137
|
+
|
138
|
+
property :to! => :array2single do |new_format|
|
139
|
+
return self if format == new_format
|
140
|
+
Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
|
91
141
|
end
|
92
142
|
|
93
|
-
|
143
|
+
property :to => :array2single do |new_format|
|
94
144
|
return self if format == new_format
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
145
|
+
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
146
|
+
end
|
147
|
+
|
148
|
+
def ensembl
|
149
|
+
to "Ensembl Transcript ID"
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
property :sequence => :array2single do
|
154
|
+
transcript_sequence = Organism.transcript_sequence(organism).tsv :persist => true
|
155
|
+
transcript_sequence.unnamed = true
|
156
|
+
transcript_sequence.values_at *self.ensembl
|
157
|
+
end
|
158
|
+
|
159
|
+
property :sequence_length => :array2single do
|
160
|
+
sequence.collect{|s|
|
161
|
+
s.nil? ? nil : s.length
|
162
|
+
}
|
163
|
+
end
|
164
|
+
|
165
|
+
property :protein => :array2single do
|
166
|
+
transcript_protein = Organism.transcripts(organism).tsv :single, :persist => true, :fields => ["Ensembl Protein ID"]
|
167
|
+
transcript_protein.unnamed = true
|
168
|
+
|
169
|
+
res = transcript_protein.values_at(*self.ensembl)
|
170
|
+
Protein.setup(res, "Ensembl Protein ID", organism)
|
171
|
+
res
|
101
172
|
end
|
102
173
|
end
|
174
|
+
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/mutation/mutation_assessor'
|
5
|
+
require 'rbbt/entity/protein'
|
6
|
+
require 'rbbt/entity/gene'
|
7
|
+
require 'rbbt/entity/mutated_isoform'
|
8
|
+
|
9
|
+
Workflow.require_workflow "Sequence"
|
10
|
+
|
11
|
+
module GenomicMutation
|
12
|
+
extend Entity
|
13
|
+
self.annotation :jobname
|
14
|
+
self.annotation :organism
|
15
|
+
self.annotation :watson
|
16
|
+
|
17
|
+
self.format = "Genomic Mutation"
|
18
|
+
|
19
|
+
property :score => :single2array do
|
20
|
+
self.split(":")[3].to_f
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
property :position => :single2array do
|
25
|
+
self.split(":")[1].to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
property :offset_in_genes => :array2single do
|
29
|
+
gene2chr_start = Misc.process_to_hash(genes.flatten){|list| list.chr_start}
|
30
|
+
position.zip(genes).collect{|position, list|
|
31
|
+
list.collect{|gene|
|
32
|
+
next if not gene2chr_start.include? gene
|
33
|
+
[gene, position.to_i - gene2chr_start[gene]] * ":"
|
34
|
+
}.compact
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
property :genes => :array2single do
|
39
|
+
@genes ||= begin
|
40
|
+
genes = Sequence.job(:genes_at_genomic_positions, jobname, :organism => organism, :positions => self).run
|
41
|
+
genes.unnamed = true
|
42
|
+
genes = genes.values_at *self
|
43
|
+
Gene.setup(genes, "Ensembl Gene ID", organism)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
property :mutated_isoforms => :array2single do
|
48
|
+
@mutated_isoforms ||= begin
|
49
|
+
res = Sequence.job(:mutated_isoforms_for_genomic_mutations, jobname, :watson => watson, :organism => organism, :mutations => self).run.values_at *self
|
50
|
+
res.each{|list| list.organism = organism}
|
51
|
+
res
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
property :exon_junctions do
|
56
|
+
@exon_junctions ||= Sequence.job(:exon_junctions_at_genomic_positions, jobname, :organism => organism, :positions => self).run.values_at *self
|
57
|
+
end
|
58
|
+
|
59
|
+
property :in_exon_junction? => :array2single do
|
60
|
+
exon_junctions.collect{|l| not l.nil? and not l.empty?}
|
61
|
+
end
|
62
|
+
|
63
|
+
property :over_gene? => :array2single do |gene|
|
64
|
+
@over_genes ||= {}
|
65
|
+
@over_genes[gene] ||= genes.clean_annotations.collect{|list| list.include? gene}
|
66
|
+
end
|
67
|
+
|
68
|
+
property :mutation_assessor_scores => :array2single do
|
69
|
+
@mutation_assessor_scores ||= begin
|
70
|
+
mutated_isoforms = self.mutated_isoforms
|
71
|
+
all_mutated_isoforms = MutatedIsoform.setup(mutated_isoforms.flatten.compact, organism)
|
72
|
+
mutated_isoform2damage_score = Misc.process_to_hash(all_mutated_isoforms){|list| all_mutated_isoforms.mutation_assessor_scores}
|
73
|
+
|
74
|
+
MutatedIsoform.setup(mutated_isoforms.collect{|list| list.nil? ? [] : mutated_isoform2damage_score.values_at(*list)}, organism)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
property :truncated do
|
79
|
+
@truncated ||= begin
|
80
|
+
mutated_isoforms = self.mutated_isoforms
|
81
|
+
all_mutated_isoforms = MutatedIsoform.setup(mutated_isoforms.flatten.compact, organism)
|
82
|
+
mutated_isoform2truncated = Misc.process_to_hash(all_mutated_isoforms){|list| all_mutated_isoforms.truncated}
|
83
|
+
mutated_isoforms.collect{|list| list.nil? ? [] : mutated_isoform2truncated.values_at(*list)}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
property :affected_exons => :array2single do
|
88
|
+
@affected_exons ||= begin
|
89
|
+
Sequence.job(:exons_at_genomic_positions, jobname, :organism => organism, :positions => self).run.values_at *self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
data/lib/rbbt/entity/genotype.rb
CHANGED
@@ -1,186 +1,162 @@
|
|
1
|
-
require 'rbbt/entity'
|
2
1
|
require 'rbbt/workflow'
|
3
|
-
require 'rbbt/
|
4
|
-
require 'rbbt/
|
5
|
-
require 'rbbt/entity/protein'
|
6
|
-
|
7
|
-
Workflow.require_workflow "Sequence"
|
8
|
-
|
9
|
-
module MutatedIsoform
|
10
|
-
extend Entity
|
11
|
-
self.annotation :organism
|
2
|
+
require 'rbbt/entity'
|
3
|
+
require 'rbbt/entity/genomic_mutation'
|
12
4
|
|
13
|
-
|
5
|
+
module Genotype
|
6
|
+
extend Workflow
|
14
7
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
else
|
19
|
-
Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
|
8
|
+
if self.respond_to? :extended
|
9
|
+
class << self
|
10
|
+
alias prev_genotype_extended extended
|
20
11
|
end
|
21
12
|
end
|
22
13
|
|
23
|
-
def
|
24
|
-
if
|
25
|
-
|
26
|
-
|
27
|
-
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
28
|
-
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
14
|
+
def self.extended(base)
|
15
|
+
prev_genotype_extended(base) if self.respond_to? :prev_genotype_extended
|
16
|
+
base.helper :genotype do
|
17
|
+
base
|
29
18
|
end
|
30
19
|
end
|
31
20
|
|
32
|
-
|
33
|
-
|
34
|
-
prot, change = self.split(":")
|
35
|
-
|
36
|
-
case
|
37
|
-
when change =~ /UTR/
|
38
|
-
"UTR"
|
39
|
-
when (change[0] == ASTERISK and not change[0] == change[-1])
|
40
|
-
"NOSTOP"
|
41
|
-
when (change[-1] == ASTERISK and not change[0] == change[-1])
|
42
|
-
"NONSENSE"
|
43
|
-
when change =~ /Indel/
|
44
|
-
"INDEL"
|
45
|
-
when change =~ /FrameShift/
|
46
|
-
"FRAMESHIFT"
|
47
|
-
when change[0] == change[-1]
|
48
|
-
"SYNONYMOUS"
|
49
|
-
else
|
50
|
-
"MISS-SENSE"
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def ary_type
|
55
|
-
self.collect{|mutation| mutation.single_type}
|
56
|
-
end
|
57
|
-
|
21
|
+
module Cohort
|
22
|
+
extend Workflow
|
58
23
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def filter(*types)
|
64
|
-
list = self.zip(type).select do |mutation, type|
|
65
|
-
types.include? type
|
66
|
-
end.collect{|mutation, type| mutation}
|
67
|
-
|
68
|
-
MutatedIsoform.setup(list, organism)
|
69
|
-
end
|
70
|
-
|
71
|
-
def self2mutation_assessor_prediction
|
72
|
-
if Array === self
|
73
|
-
filtered = filter "MISS-SENSE"
|
74
|
-
correspondance = {}
|
75
|
-
mutations = filtered.zip(filtered.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
|
76
|
-
prot, change = mutation.split(":")
|
77
|
-
next if uniprot.nil?
|
78
|
-
uniprot_change = [uniprot, change]
|
79
|
-
correspondance[uniprot_change] = mutation
|
80
|
-
uniprot_change
|
81
|
-
end.compact
|
82
|
-
|
83
|
-
tsv = MutationAssessor.chunked_predict(mutations)
|
84
|
-
return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"]) if tsv.empty?
|
85
|
-
tsv.add_field "Mutated Isoform" do |key, values|
|
86
|
-
correspondance[key.split(" ")]
|
24
|
+
if self.respond_to? :extended
|
25
|
+
class << self
|
26
|
+
alias prev_genotype_cohort_extended extended
|
87
27
|
end
|
88
|
-
tsv.reorder "Mutated Isoform", ["Func. Impact"]
|
89
|
-
else
|
90
|
-
prot, change = mutation.split(":")
|
91
|
-
uniprot = protein.to "UniProt/SwissProt ID"
|
92
|
-
mutations = [uniprot, change]
|
93
|
-
|
94
|
-
tsv = MutationAssessor.chunked_predict(mutations)
|
95
|
-
tsv.add_field "Mutated Isoform" do |key, values|
|
96
|
-
self
|
97
|
-
end
|
98
|
-
tsv.reorder "Mutated Isoform", ["Func. Impact"]
|
99
28
|
end
|
100
|
-
end
|
101
29
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
30
|
+
def self.extended(base)
|
31
|
+
prev_genotype_cohort_extended(base) if self.respond_to? :prev_genotype_cohort_extended
|
32
|
+
|
33
|
+
class << base
|
34
|
+
attr_accessor :metagenotype
|
35
|
+
|
36
|
+
def jobname
|
37
|
+
if @jobname.nil?
|
38
|
+
@jobname ||= "Meta-genotype: " + self.collect{|g| g.jobname} * ", "
|
39
|
+
@jobname[100..-1] = " (etc; #{self.length} genotypes)" if @jobname.length > 100
|
40
|
+
end
|
41
|
+
@jobname
|
42
|
+
end
|
43
|
+
|
44
|
+
def metagenotype
|
45
|
+
if @metagenotype.nil?
|
46
|
+
@metagenotype = GenomicMutation.setup(self.dup.flatten, jobname, self[0].organism, self[0].watson)
|
47
|
+
@metagenotype.extend Genotype unless Genotype === @metagenotype
|
48
|
+
end
|
49
|
+
@metagenotype
|
50
|
+
end
|
51
|
+
end unless base.respond_to? :metagenotype
|
52
|
+
|
53
|
+
base.each do |genotype| genotype.extend Genotype unless Genotype === genotype end
|
54
|
+
|
55
|
+
base.helper :metagenotype do
|
56
|
+
base.metagenotype
|
110
57
|
end
|
111
|
-
}
|
112
|
-
end
|
113
58
|
|
114
|
-
|
115
|
-
|
116
|
-
filter("FRAMESHIFT").select{|isoform_mutation|
|
117
|
-
protein, mutation = isoform_mutation.split ":"
|
118
|
-
if protein_sequences.include? protein
|
119
|
-
mutation.match(/(\d+)/)[1].to_f < protein_sequences[protein].length.to_f * 0.7
|
120
|
-
else
|
121
|
-
false
|
59
|
+
base.helper :samples do
|
60
|
+
base
|
122
61
|
end
|
123
|
-
}
|
124
|
-
end
|
125
62
|
|
126
|
-
|
127
|
-
|
63
|
+
NamedArray.setup(base, base.collect{|base| base.jobname})
|
64
|
+
end
|
65
|
+
|
66
|
+
returns "Ensembl Gene ID"
|
67
|
+
task :all_affected_genes => :array do
|
68
|
+
set_info :organism, metagenotype.organism
|
69
|
+
samples.collect{|genotype| genotype.all_affected_genes}.flatten.uniq
|
70
|
+
end
|
128
71
|
|
129
|
-
|
130
|
-
|
72
|
+
returns "Ensembl Gene ID"
|
73
|
+
task :damaged_genes => :array do
|
74
|
+
set_info :organism, metagenotype.organism
|
75
|
+
samples.collect{|genotype| genotype.damaged_genes}.flatten.uniq
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
returns "Ensembl Gene ID"
|
80
|
+
task :recurrent_genes => :array do
|
81
|
+
set_info :organism, metagenotype.organism
|
82
|
+
count = Hash.new(0)
|
83
|
+
samples.each do |genotype| genotype.genes.flatten.uniq.each{|gene| count[gene] += 1} end
|
84
|
+
count.select{|gene, c| c > 1}.collect{|gene,c| gene.dup}
|
85
|
+
end
|
131
86
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
value = levels.index(v[0].to_s)
|
137
|
-
value and value >= cutoff
|
87
|
+
%w(damaged_genes recurrent_genes all_affected_genes).each do |name|
|
88
|
+
define_method name do |*args|
|
89
|
+
@cache ||= {}
|
90
|
+
@cache[[name, args]] ||= self.job(name, self.jobname).run
|
138
91
|
end
|
139
|
-
|
92
|
+
end
|
140
93
|
|
141
|
-
|
142
|
-
predicted += early_frameshifts if options[:frameshift]
|
94
|
+
end
|
143
95
|
|
144
|
-
|
96
|
+
returns "Ensembl Gene ID"
|
97
|
+
task :all_affected_genes => :array do
|
98
|
+
set_info :organism, genotype.organism
|
99
|
+
genotype.genes.clean_annotations.flatten.uniq
|
145
100
|
end
|
146
|
-
end
|
147
101
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
102
|
+
dep :all_affected_genes
|
103
|
+
returns "Ensembl Gene ID"
|
104
|
+
task :long_genes => :array do
|
105
|
+
all_affected_genes = step(:all_affected_genes).load
|
106
|
+
long_genes = all_affected_genes.select{|gene|
|
107
|
+
length = gene.max_protein_length
|
108
|
+
length and length > 1000 or gene.name =~ /^PCDH/
|
109
|
+
}
|
152
110
|
|
153
|
-
|
111
|
+
set_info :organism, genotype.organism
|
112
|
+
long_genes.clean_annotations
|
113
|
+
end
|
154
114
|
|
155
|
-
|
156
|
-
|
115
|
+
returns "Ensembl Gene ID"
|
116
|
+
task :mutations_in_exon_junctions => :array do
|
117
|
+
set_info :organism, genotype.organism
|
118
|
+
genotype.select{|mutation| mutation.in_exon_junction?}.clean_annotations
|
157
119
|
end
|
158
120
|
|
159
|
-
|
160
|
-
|
121
|
+
returns "Ensembl Gene ID"
|
122
|
+
input :threshold, :float, "from 0 to 1", 0.5
|
123
|
+
task :with_damaged_isoforms => :array do |threshold|
|
124
|
+
set_info :organism, genotype.organism
|
125
|
+
mutated_isoform_damage = Misc.process_to_hash(genotype.mutated_isoforms.flatten.compact){|list| MutatedIsoform.setup(list, genotype.organism).damage_scores}
|
126
|
+
genotype.select{|mutation| if mutation.mutated_isoforms then mutated_isoform_damage.values_at(*mutation.mutated_isoforms.flatten.compact).select{|score| not score.nil? and score > threshold}.any? else false; end}.genes.flatten.uniq.clean_annotations
|
161
127
|
end
|
162
128
|
|
163
|
-
|
164
|
-
|
129
|
+
returns "Ensembl Gene ID"
|
130
|
+
task :truncated => :array do
|
131
|
+
set_info :organism, genotype.organism
|
132
|
+
MutatedIsoform.setup(genotype.mutated_isoforms.flatten.compact, "Hsa/jun2011").
|
133
|
+
select{|isoform_mutation| isoform_mutation.truncated }.
|
134
|
+
protein.gene.to("Ensembl Gene ID").uniq.clean_annotations
|
165
135
|
end
|
166
136
|
|
167
|
-
|
168
|
-
|
137
|
+
returns "Ensembl Gene ID"
|
138
|
+
task :affected_exon_junctions => :array do
|
139
|
+
set_info :organism, genotype.organism
|
140
|
+
genotype.select{|mutation| mutation.in_exon_junction?}.genes.flatten.clean_annotations
|
169
141
|
end
|
170
142
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
143
|
+
dep :with_damaged_isoforms, :truncated, :affected_exon_junctions
|
144
|
+
returns "Ensembl Gene ID"
|
145
|
+
task :damaged_genes => :array do
|
146
|
+
set_info :organism, genotype.organism
|
147
|
+
|
148
|
+
with_damaged_isoforms = step(:with_damaged_isoforms).load.clean_annotations
|
149
|
+
truncated = step(:truncated).load.clean_annotations
|
150
|
+
affected_exon_junctions = step(:affected_exon_junctions).load.clean_annotations
|
151
|
+
|
152
|
+
(with_damaged_isoforms + truncated + affected_exon_junctions).uniq
|
178
153
|
end
|
179
154
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
155
|
+
%w(all_affected_genes damaged_genes truncated with_damaged_isoforms affected_exon_junctions long_genes recurrent_genes).each do |name|
|
156
|
+
define_method name do |*args|
|
157
|
+
@cache ||= {}
|
158
|
+
@cache[[name, args]] ||= self.job(name, self.jobname).run
|
159
|
+
end
|
185
160
|
end
|
186
161
|
end
|
162
|
+
|
@@ -0,0 +1,179 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/mutation/mutation_assessor'
|
5
|
+
require 'rbbt/mutation/sift'
|
6
|
+
require 'rbbt/entity/protein'
|
7
|
+
require 'rbbt/entity/gene'
|
8
|
+
require 'nokogiri'
|
9
|
+
|
10
|
+
module MutatedIsoform
|
11
|
+
extend Entity
|
12
|
+
self.annotation :organism
|
13
|
+
|
14
|
+
self.format = "Mutated Isoform"
|
15
|
+
|
16
|
+
property :protein do
|
17
|
+
if Array === self
|
18
|
+
Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
|
19
|
+
else
|
20
|
+
Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
property :change => :single2array do
|
25
|
+
self.split(":").last
|
26
|
+
end
|
27
|
+
|
28
|
+
property :position => :single2array do
|
29
|
+
if change.match(/[^\d](\d+)[^\d]/)
|
30
|
+
$1.to_i
|
31
|
+
else
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
property :ensembl_protein_image_url => :single2array do
|
37
|
+
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
38
|
+
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
|
39
|
+
end
|
40
|
+
|
41
|
+
property :marked_svg => :single2array do
|
42
|
+
svg = Open.read(protein.ensembl_protein_image_url)
|
43
|
+
seq_len = protein.sequence_length
|
44
|
+
position = self.position
|
45
|
+
|
46
|
+
|
47
|
+
doc = Nokogiri::XML(svg)
|
48
|
+
width = doc.css('svg').first.attr('width').to_f
|
49
|
+
height = doc.css('svg').first.attr('height').to_f
|
50
|
+
start = doc.css('rect.ac').first.attr('x').to_f
|
51
|
+
|
52
|
+
if width and height and start and seq_len and position
|
53
|
+
offset = (width - start)/seq_len * position + start
|
54
|
+
svg.sub(/<\/svg>/,"<rect x='#{offset}' y='1' width='1' height='#{height}' style='fill:rgb(255,0,0);opacity:0.5;stroke:none;'></svg>")
|
55
|
+
else
|
56
|
+
svg
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
ASTERISK = "*"[0]
|
61
|
+
CONSECUENCES = %w(UTR SYNONYMOUS NOSTOP MISS-SENSE INDEL FRAMESHIFT NONSENSE)
|
62
|
+
property :consecuence => :single2array do
|
63
|
+
prot, change = self.split(":")
|
64
|
+
|
65
|
+
case
|
66
|
+
when change =~ /UTR/
|
67
|
+
"UTR"
|
68
|
+
when (change[0] == ASTERISK and not change[0] == change[-1])
|
69
|
+
"NOSTOP"
|
70
|
+
when (change[-1] == ASTERISK and not change[0] == change[-1])
|
71
|
+
"NONSENSE"
|
72
|
+
when change =~ /Indel/
|
73
|
+
"INDEL"
|
74
|
+
when change =~ /FrameShift/
|
75
|
+
"FRAMESHIFT"
|
76
|
+
when change[0] == change[-1]
|
77
|
+
"SYNONYMOUS"
|
78
|
+
else
|
79
|
+
"MISS-SENSE"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
property :truncated => :array2single do
|
84
|
+
@truncated ||= begin
|
85
|
+
protein2sequence_length = Misc.process_to_hash(self.protein.flatten){|list| list.sequence_length}
|
86
|
+
self.collect do |isoform_mutation|
|
87
|
+
|
88
|
+
next if isoform_mutation.consecuence != "FRAMESHIFT" and isoform_mutation.consecuence != "NONSENSE"
|
89
|
+
protein = isoform_mutation.protein
|
90
|
+
position = isoform_mutation.position
|
91
|
+
sequence_length = protein2sequence_length[protein]
|
92
|
+
|
93
|
+
case
|
94
|
+
when (sequence_length.nil? or position.nil?)
|
95
|
+
nil
|
96
|
+
when position < sequence_length.to_f * 0.7
|
97
|
+
true
|
98
|
+
else
|
99
|
+
false
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
property :damage_scores => :array2single do
|
107
|
+
@damage_scores ||= begin
|
108
|
+
sift_scores.zip(mutation_assessor_scores).collect{|p|
|
109
|
+
p = p.compact
|
110
|
+
if p.empty?
|
111
|
+
nil
|
112
|
+
else
|
113
|
+
p.inject(0.0){|acc, e| acc += e} / p.length
|
114
|
+
end
|
115
|
+
}
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
property :sift_scores => :array2single do
|
120
|
+
@sift_scores ||= begin
|
121
|
+
missense = self.select{|iso_mut| iso_mut.consecuence == "MISS-SENSE"}
|
122
|
+
|
123
|
+
values = SIFT.chunked_predict(missense).values_at(*self).collect{|v|
|
124
|
+
v.nil? ? nil : v["Prediction"]
|
125
|
+
}
|
126
|
+
|
127
|
+
range = {nil => nil,
|
128
|
+
"" => nil,
|
129
|
+
"TOLERATED" => 0,
|
130
|
+
"*DAMAGING" => 1,
|
131
|
+
"DAMAGING" => 1}
|
132
|
+
|
133
|
+
range.values_at *values
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
property :mutation_assessor_scores => :array2single do
|
138
|
+
@mutation_assesor_scores ||= begin
|
139
|
+
missense = self.select{|mutation| mutation.consecuence == "MISS-SENSE"}
|
140
|
+
|
141
|
+
correspondance = {}
|
142
|
+
list = missense.zip(missense.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
|
143
|
+
prot, change = mutation.split(":")
|
144
|
+
next if uniprot.nil?
|
145
|
+
uniprot_change = [uniprot, change]
|
146
|
+
correspondance[uniprot_change] ||= []
|
147
|
+
correspondance[uniprot_change] << mutation
|
148
|
+
uniprot_change
|
149
|
+
end.compact
|
150
|
+
|
151
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if list.empty?
|
152
|
+
return [nil] * self.length if list.empty?
|
153
|
+
|
154
|
+
tsv = MutationAssessor.chunked_predict(list.sort_by{|p| p * "_"})
|
155
|
+
|
156
|
+
#return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list) if tsv.nil? or tsv.empty?
|
157
|
+
return [nil] * self.length if tsv.empty?
|
158
|
+
|
159
|
+
new = TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"], :type => :list)
|
160
|
+
|
161
|
+
tsv.each do |key, values|
|
162
|
+
correspondance[key.split(" ")].each do |mutation|
|
163
|
+
new[mutation] = values["Func. Impact"]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
range = {nil => nil,
|
169
|
+
"" => nil,
|
170
|
+
"neutral" => 0,
|
171
|
+
"low" => 0.3,
|
172
|
+
"medium" => 0.6,
|
173
|
+
"high" => 1}
|
174
|
+
|
175
|
+
range.values_at *new.values_at(*self)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/sources/pubmed'
|
3
|
+
|
4
|
+
module PMID
|
5
|
+
extend Entity
|
6
|
+
|
7
|
+
self.format = "PMID"
|
8
|
+
|
9
|
+
property :title => :array2single do
|
10
|
+
@title ||= begin
|
11
|
+
PubMed.get_article(self).values_at(*self).collect{|article| article.nil? ? nil : article.title}
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
property :pubmed_url => :single2array do
|
16
|
+
"<a class='pmid' href='http://www.ncbi.nlm.nih.gov/pubmed/#{self}'>#{ self }</a>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
data/lib/rbbt/entity/protein.rb
CHANGED
@@ -3,6 +3,7 @@ require 'rbbt/workflow'
|
|
3
3
|
require 'rbbt/sources/organism'
|
4
4
|
require 'rbbt/statistics/hypergeometric'
|
5
5
|
require 'rbbt/network/paths'
|
6
|
+
require 'rbbt/entity/gene'
|
6
7
|
|
7
8
|
Workflow.require_workflow "Translation"
|
8
9
|
|
@@ -14,17 +15,48 @@ module Protein
|
|
14
15
|
self.annotation :format
|
15
16
|
self.annotation :organism
|
16
17
|
|
17
|
-
|
18
|
-
|
18
|
+
self.format = "Ensembl Protein ID"
|
19
|
+
|
20
|
+
def ensembl
|
21
|
+
to "Ensembl Protein ID"
|
22
|
+
end
|
23
|
+
|
24
|
+
property :ensembl_protein_image_url => :single2array do
|
25
|
+
ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
|
26
|
+
"http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{ensembl};_rmd=d2a8;export=svg"
|
19
27
|
end
|
20
28
|
|
21
|
-
|
29
|
+
property :to! => :array2single do |new_format|
|
22
30
|
return self if format == new_format
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
31
|
+
Protein.setup(Translation.job(:translate_protein, "", :organism => organism, :proteins => self, :format => new_format).exec, new_format, organism)
|
32
|
+
end
|
33
|
+
|
34
|
+
property :to => :array2single do |new_format|
|
35
|
+
return self if format == new_format
|
36
|
+
to!(new_format).collect!{|v| v.nil? ? nil : v.first}
|
37
|
+
end
|
38
|
+
|
39
|
+
property :gene do
|
40
|
+
Gene.setup(to("Ensembl Protein ID").clean_annotations, "Ensembl Protein ID", organism)
|
41
|
+
end
|
42
|
+
|
43
|
+
property :pfam => :array2single do
|
44
|
+
index = Organism.gene_pfam(organism).tsv :flat, :persist => true
|
45
|
+
index.unnamed = true
|
46
|
+
pfam = index.values_at(*self).flatten
|
47
|
+
Pfam.setup pfam
|
48
|
+
end
|
49
|
+
|
50
|
+
property :sequence => :array2single do
|
51
|
+
@protein_sequence ||= begin
|
52
|
+
protein_sequence = Organism.protein_sequence(organism).tsv :persist => true
|
53
|
+
protein_sequence.unnamed = true
|
54
|
+
protein_sequence.values_at(*self.ensembl)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
property :sequence_length => :array2single do
|
59
|
+
sequence.collect{|seq| seq.nil? ? nil : seq.length}
|
28
60
|
end
|
29
61
|
end
|
30
62
|
|
@@ -6,9 +6,49 @@ require 'test/unit'
|
|
6
6
|
require 'rbbt/entity/gene'
|
7
7
|
|
8
8
|
class TestGene < Test::Unit::TestCase
|
9
|
+
CDK5 = Gene.setup("CDK5", "Associated Gene Name", "Hsa")
|
10
|
+
TP53 = Gene.setup("TP53", "Associated Gene Name", "Hsa")
|
11
|
+
TWO = Gene.setup(["CDK5", "TP53"], "Associated Gene Name", "Hsa")
|
12
|
+
|
9
13
|
def test_to
|
10
14
|
assert_equal "1020", Gene.setup("CDK5", "Associated Gene Name", "Hsa").to("Entrez Gene ID")
|
11
15
|
end
|
16
|
+
|
17
|
+
def test_long_name
|
18
|
+
assert_equal "cyclin-dependent kinase 5", Gene.setup("CDK5", "Associated Gene Name", "Hsa").long_name
|
19
|
+
assert_equal ["cyclin-dependent kinase 5"], Gene.setup(["CDK5"], "Associated Gene Name", "Hsa").long_name
|
20
|
+
|
21
|
+
assert_match /tumor/, Gene.setup("TP53", "Associated Gene Name", "Hsa").description
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_transcripts
|
25
|
+
assert CDK5.transcripts.length > 1
|
26
|
+
assert_equal "Hsa", CDK5.transcripts.organism
|
27
|
+
assert_equal "Hsa", CDK5.make_list.transcripts.first.organism
|
28
|
+
assert_equal "Hsa", CDK5.transcripts.make_list.organism
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_proteins
|
32
|
+
assert CDK5.proteins.length > 1
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_max_protein_length
|
36
|
+
assert CDK5.max_protein_length > 200
|
37
|
+
assert Array === TWO.max_protein_length
|
38
|
+
assert TWO.max_protein_length.first > 200
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_max_transcript_length
|
42
|
+
assert CDK5.max_transcript_length > 200
|
43
|
+
assert Array === TWO.max_transcript_length
|
44
|
+
assert TWO.max_transcript_length.first > 200
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_range
|
48
|
+
assert Range === CDK5.range
|
49
|
+
assert Range === CDK5.make_list.range.first
|
50
|
+
end
|
51
|
+
|
12
52
|
end
|
13
53
|
|
14
54
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'rbbt/entity/genomic_mutation'
|
7
|
+
|
8
|
+
class TestGenomicMutation < Test::Unit::TestCase
|
9
|
+
MUTATION = GenomicMutation.setup("10:124745844:A:158", "Test", "Hsa/jun2011")
|
10
|
+
SPLICING = GenomicMutation.setup("18:14787040:A", "Test", "Hsa/jun2011")
|
11
|
+
GENOTYPE = GenomicMutation.setup(Rbbt.data.genotype.list, "Test", "Hsa/jun2011")
|
12
|
+
|
13
|
+
def test_genes
|
14
|
+
assert GENOTYPE.genes.flatten.to("Associated Gene Name").include? "PSTK"
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_consolidate
|
18
|
+
assert GENOTYPE.genes.consolidate.to("Associated Gene Name").include? "PSTK"
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_mutated_isoforms
|
22
|
+
assert MUTATION.mutated_isoforms.length > 1
|
23
|
+
assert ["PSTK"], MUTATION.mutated_isoforms.protein.gene.to("Associated Gene Name").uniq
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_exon_junction
|
27
|
+
assert(!(MUTATION.in_exon_junction?))
|
28
|
+
assert SPLICING.in_exon_junction?
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_over_gene
|
32
|
+
assert MUTATION.over_gene? Gene.setup("PSTK", "Associated Gene Name", "Hsa/jun2011").ensembl
|
33
|
+
assert(!(SPLICING.over_gene? Gene.setup("PSTK", "Associated Gene Name", "Hsa/jun2011").ensembl))
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'rbbt/entity/mutated_isoform'
|
7
|
+
|
8
|
+
class TestMutatedIsoform < Test::Unit::TestCase
|
9
|
+
MUTATION = MutatedIsoform.setup("ENSP00000275493:G719A", "Hsa/jun2011")
|
10
|
+
|
11
|
+
def test_protein
|
12
|
+
assert_equal "EGFR", MUTATION.protein.gene.to("Associated Gene Name")
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_truncated
|
16
|
+
|
17
|
+
protein = Protein.setup(Gene.setup("CDK5", "Associated Gene Name", "Hsa/jun2011").to("Ensembl Protein ID"), "Ensembl Protein ID", "Hsa/jun2011")
|
18
|
+
|
19
|
+
change_position = (protein.sequence_length.to_f * 0.5).to_i
|
20
|
+
wildtype = protein.sequence[(change_position..change_position)]
|
21
|
+
mutation = "*"
|
22
|
+
new_mutation = wildtype << change_position.to_s << mutation
|
23
|
+
mutation = MutatedIsoform.setup([protein, new_mutation] * ":", "Hsa/jun2011")
|
24
|
+
assert mutation.truncated
|
25
|
+
|
26
|
+
|
27
|
+
change_position = (protein.sequence_length.to_f * 0.9).to_i
|
28
|
+
wildtype = protein.sequence[(change_position..change_position)]
|
29
|
+
mutation = "*"
|
30
|
+
new_mutation = wildtype << change_position.to_s << mutation
|
31
|
+
mutation = MutatedIsoform.setup([protein, new_mutation] * ":", "Hsa/jun2011")
|
32
|
+
assert !mutation.truncated
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
require 'rbbt/entity/protein'
|
7
|
+
|
8
|
+
class TestProtein < Test::Unit::TestCase
|
9
|
+
PROTEIN = Protein.setup("ENSP00000275493", "Hsa/jun2011")
|
10
|
+
PROTEIN_ARRAY = Protein.setup(["ENSP00000275493"], "Hsa/jun2011")
|
11
|
+
|
12
|
+
def test_clean_annotations
|
13
|
+
assert Protein === PROTEIN
|
14
|
+
assert(!(Protein === PROTEIN.clean_annotations))
|
15
|
+
assert Gene === PROTEIN.gene
|
16
|
+
assert(!(Protein === PROTEIN.gene))
|
17
|
+
assert(PROTEIN_ARRAY.respond_to? :annotated_array_clean_each)
|
18
|
+
assert(!(PROTEIN_ARRAY.clean_annotations.respond_to? :annotated_array_clean_each))
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_gene
|
22
|
+
assert_equal "EGFR", PROTEIN.gene.to("Associated Gene Name")
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
|
File without changes
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-entities
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
|
-
- 0
|
8
7
|
- 1
|
9
8
|
- 0
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-11-17 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -71,12 +71,20 @@ extra_rdoc_files:
|
|
71
71
|
files:
|
72
72
|
- LICENSE
|
73
73
|
- lib/rbbt/entity.rb
|
74
|
+
- lib/rbbt/entity/cnv.rb
|
74
75
|
- lib/rbbt/entity/gene.rb
|
76
|
+
- lib/rbbt/entity/genomic_mutation.rb
|
75
77
|
- lib/rbbt/entity/genotype.rb
|
76
78
|
- lib/rbbt/entity/misc.rb
|
79
|
+
- lib/rbbt/entity/mutated_isoform.rb
|
80
|
+
- lib/rbbt/entity/pmid.rb
|
77
81
|
- lib/rbbt/entity/protein.rb
|
78
82
|
- test/test_helper.rb
|
79
83
|
- test/rbbt/entity/test_gene.rb
|
84
|
+
- test/rbbt/entity/test_genomic_mutation.rb
|
85
|
+
- test/rbbt/entity/test_mutated_isoform.rb
|
86
|
+
- test/rbbt/entity/test_protein.rb
|
87
|
+
- test/rbbt/test_entity.rb
|
80
88
|
has_rdoc: true
|
81
89
|
homepage: http://github.com/mikisvaz/rbbt-util
|
82
90
|
licenses: []
|
@@ -114,3 +122,7 @@ summary: Entities for the Ruby Bioinformatics Toolkit (rbbt)
|
|
114
122
|
test_files:
|
115
123
|
- test/test_helper.rb
|
116
124
|
- test/rbbt/entity/test_gene.rb
|
125
|
+
- test/rbbt/entity/test_genomic_mutation.rb
|
126
|
+
- test/rbbt/entity/test_mutated_isoform.rb
|
127
|
+
- test/rbbt/entity/test_protein.rb
|
128
|
+
- test/rbbt/test_entity.rb
|