rbbt-entities 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2011 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,102 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/organism'
4
+ require 'rbbt/sources/entrez'
5
+
6
+ Workflow.require_workflow "Translation"
7
+
8
+ module Gene
9
+ extend Entity
10
+
11
+ self.annotation :format
12
+ self.annotation :organism
13
+
14
+ self.format = Organism::Hsa.identifiers.all_fields
15
+
16
+ def name
17
+ to "Associated Gene Name"
18
+ end
19
+
20
+ def description
21
+ if Array === self
22
+ to("Entrez Gene ID").collect{|id| gene = Entrez.get_gene(id); gene.nil? ? nil : gene.summary}
23
+ else
24
+ gene = Entrez.get_gene(to("Entrez Gene ID"))
25
+ gene.nil? ? nil : gene.summary
26
+ end
27
+ end
28
+
29
+ def to!(new_format)
30
+ if Array === self
31
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
32
+ else
33
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => [self], :format => new_format).exec[self], new_format, organism)
34
+ end
35
+ end
36
+
37
+ def to(new_format)
38
+ return self if format == new_format
39
+ if Array === self
40
+ to!(new_format).collect!{|v| v.nil? ? nil : v.first}
41
+ else
42
+ v = to!(new_format)
43
+ v.nil? ? nil : v.first
44
+ end
45
+ end
46
+
47
+ def self2pfam
48
+ index = Organism.gene_pfam(organism).tsv :type => :flat, :persist => true
49
+ if Array === self
50
+ index.values_at(*self).flatten
51
+ else
52
+ index[self]
53
+ end
54
+ end
55
+
56
+ def chromosome
57
+ chr = Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true
58
+ if Array === self
59
+ to("Ensembl Gene ID").collect do |gene|
60
+ chr[gene]
61
+ end
62
+ else
63
+ chr[to("Ensembl Gene ID")]
64
+ end
65
+ end
66
+
67
+ def range
68
+ pos = Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i
69
+ if Array === self
70
+ to("Ensembl Gene ID").collect do |gene|
71
+ next if not pos.include? gene
72
+ Range.new *pos[gene]
73
+ end
74
+ else
75
+ return nil if not pos.include? to("Ensembl Gene ID")
76
+ Range.new *pos[to("Ensembl Gene ID")]
77
+ end
78
+ end
79
+
80
+ end
81
+
82
+ module Transcript
83
+ extend Entity
84
+
85
+ def to!(new_format)
86
+ if Array === self
87
+ Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
88
+ else
89
+ Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => [self], :format => new_format).exec[self], new_format, organism)
90
+ end
91
+ end
92
+
93
+ def to(new_format)
94
+ return self if format == new_format
95
+ if Array === self
96
+ to!(new_format).collect{|v| v.nil? ? nil : v.first}
97
+ else
98
+ v = to!(new_format)
99
+ v.nil? ? nil : v.first
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,186 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/organism'
4
+ require 'rbbt/mutation/mutation_assessor'
5
+ require 'rbbt/entity/protein'
6
+
7
+ Workflow.require_workflow "Sequence"
8
+
9
+ module MutatedIsoform
10
+ extend Entity
11
+ self.annotation :organism
12
+
13
+ self.format = "Mutated Isoform"
14
+
15
+ def protein
16
+ if Array === self
17
+ Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
18
+ else
19
+ Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
20
+ end
21
+ end
22
+
23
+ def ensembl_protein_image_url
24
+ if Array === self
25
+ self.collect{|e| e.ensembl_protein_image_url}
26
+ else
27
+ ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
28
+ "http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
29
+ end
30
+ end
31
+
32
+ ASTERISK = "*"[0]
33
+ def single_type
34
+ prot, change = self.split(":")
35
+
36
+ case
37
+ when change =~ /UTR/
38
+ "UTR"
39
+ when (change[0] == ASTERISK and not change[0] == change[-1])
40
+ "NOSTOP"
41
+ when (change[-1] == ASTERISK and not change[0] == change[-1])
42
+ "NONSENSE"
43
+ when change =~ /Indel/
44
+ "INDEL"
45
+ when change =~ /FrameShift/
46
+ "FRAMESHIFT"
47
+ when change[0] == change[-1]
48
+ "SYNONYMOUS"
49
+ else
50
+ "MISS-SENSE"
51
+ end
52
+ end
53
+
54
+ def ary_type
55
+ self.collect{|mutation| mutation.single_type}
56
+ end
57
+
58
+
59
+ def type
60
+ Array === self ? ary_type : single_type
61
+ end
62
+
63
+ def filter(*types)
64
+ list = self.zip(type).select do |mutation, type|
65
+ types.include? type
66
+ end.collect{|mutation, type| mutation}
67
+
68
+ MutatedIsoform.setup(list, organism)
69
+ end
70
+
71
+ def self2mutation_assessor_prediction
72
+ if Array === self
73
+ filtered = filter "MISS-SENSE"
74
+ correspondance = {}
75
+ mutations = filtered.zip(filtered.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
76
+ prot, change = mutation.split(":")
77
+ next if uniprot.nil?
78
+ uniprot_change = [uniprot, change]
79
+ correspondance[uniprot_change] = mutation
80
+ uniprot_change
81
+ end.compact
82
+
83
+ tsv = MutationAssessor.chunked_predict(mutations)
84
+ return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"]) if tsv.empty?
85
+ tsv.add_field "Mutated Isoform" do |key, values|
86
+ correspondance[key.split(" ")]
87
+ end
88
+ tsv.reorder "Mutated Isoform", ["Func. Impact"]
89
+ else
90
+ prot, change = mutation.split(":")
91
+ uniprot = protein.to "UniProt/SwissProt ID"
92
+ mutations = [uniprot, change]
93
+
94
+ tsv = MutationAssessor.chunked_predict(mutations)
95
+ tsv.add_field "Mutated Isoform" do |key, values|
96
+ self
97
+ end
98
+ tsv.reorder "Mutated Isoform", ["Func. Impact"]
99
+ end
100
+ end
101
+
102
+ def early_nonsense
103
+ protein_sequences = Organism.protein_sequence(organism).tsv :persist => true, :type => :single
104
+ filter("NONSENSE").select{|isoform_mutation|
105
+ protein, mutation = isoform_mutation.split ":"
106
+ if protein_sequences.include? protein
107
+ mutation.match(/(\d+)/)[1].to_f < protein_sequences[protein].length.to_f * 0.7
108
+ else
109
+ false
110
+ end
111
+ }
112
+ end
113
+
114
+ def early_frameshifts
115
+ protein_sequences = Organism.protein_sequence(organism).tsv :persist => true, :type => :single
116
+ filter("FRAMESHIFT").select{|isoform_mutation|
117
+ protein, mutation = isoform_mutation.split ":"
118
+ if protein_sequences.include? protein
119
+ mutation.match(/(\d+)/)[1].to_f < protein_sequences[protein].length.to_f * 0.7
120
+ else
121
+ false
122
+ end
123
+ }
124
+ end
125
+
126
+ def damaged(options = {})
127
+ options = Misc.add_defaults options, :mutation_assesor_cutoff => :medium, :non_sense => true, :frameshift => true
128
+
129
+ levels = [:low, :medium, :high].collect{|v| v.to_s}
130
+ cutoff = levels.index options[:mutation_assesor_cutoff].to_s
131
+
132
+ predicted = self2mutation_assessor_prediction.select{|k, v|
133
+ if v.nil?
134
+ false
135
+ else
136
+ value = levels.index(v[0].to_s)
137
+ value and value >= cutoff
138
+ end
139
+ }.collect{|k,v| k}
140
+
141
+ predicted += early_nonsense if options[:non_sense]
142
+ predicted += early_frameshifts if options[:frameshift]
143
+
144
+ MutatedIsoform.setup(predicted, organism)
145
+ end
146
+ end
147
+
148
+ module GenomicMutation
149
+ extend Entity
150
+ self.annotation :name
151
+ self.annotation :organism
152
+
153
+ self.format = "Genomic Mutation"
154
+
155
+ def self2genes
156
+ Sequence.job(:genes_at_genomic_positions, name, :organism => organism, :positions => Array === self ? self : [self]).run
157
+ end
158
+
159
+ def genes
160
+ Gene.setup(self2genes.values.flatten.uniq, "Ensembl Gene ID", organism)
161
+ end
162
+
163
+ def self2mutated_isoforms
164
+ Sequence.job(:mutated_isoforms_for_genomic_mutations, name, :organism => organism, :mutations => Array === self ? self : [self]).run
165
+ end
166
+
167
+ def mutated_isoforms
168
+ MutatedIsoform.setup(self2mutated_isoforms.values.flatten, organism)
169
+ end
170
+
171
+ def damaging_mutations(options = {})
172
+ damaged_isoforms = mutated_isoforms.damaged(options)
173
+ damaging_mutations = self2mutated_isoforms.select{|mutation, values|
174
+ mutated_isoforms = values["Mutated Isoform"]
175
+ (damaged_isoforms & mutated_isoforms).any?
176
+ }.collect{|mutation, mutated_isoforms| mutation.dup}
177
+ GenomicMutation.setup(damaging_mutations, name + '.damaging', organism)
178
+ end
179
+
180
+ def mutations_at_genes(genes)
181
+ genes = genes.to("Ensembl Gene ID").compact
182
+ s2g = self.self2genes
183
+ subset = s2g.select("Ensembl Gene ID" => genes).keys.collect{|e| e.dup}
184
+ GenomicMutation.setup(subset, name + '.mutations_at_genes', organism)
185
+ end
186
+ end
@@ -0,0 +1,26 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/go'
4
+ require 'rbbt/sources/organism'
5
+ require 'rbbt/entity/gene'
6
+
7
+ module GOTerm
8
+ extend Entity
9
+ self.annotation :organism
10
+
11
+ self.format = ["GO Term", "GO ID"]
12
+
13
+ def name
14
+ if Array === self
15
+ self.collect{|id| GO.id2name(id)}
16
+ else
17
+ GO.id2name(self)
18
+ end
19
+ end
20
+
21
+ def genes
22
+ go2genes = Organism.gene_go(organism).tsv(:key_field => "GO ID", :fields => ["Ensembl Gene ID"], :merge => true, :persist => true)
23
+ go2genes.unnamed = true
24
+ Gene.setup(go2genes[self].first, "Ensembl Gene ID", organism)
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/organism'
4
+ require 'rbbt/statistics/hypergeometric'
5
+ require 'rbbt/network/paths'
6
+
7
+ Workflow.require_workflow "Translation"
8
+
9
+ module Protein
10
+ extend Entity
11
+ include Entity::Adjacent
12
+ include Entity::Enriched
13
+
14
+ self.annotation :format
15
+ self.annotation :organism
16
+
17
+ def gene
18
+ Gene.setup(to("Ensembl Protein ID"), "Ensembl Protein ID", organism)
19
+ end
20
+
21
+ def to(new_format)
22
+ return self if format == new_format
23
+ if Array === self
24
+ Protein.setup(Translation.job(:translate_protein, "", :organism => organism, :proteins => self, :format => new_format).exec, new_format, organism)
25
+ else
26
+ Protein.setup(Translation.job(:translate_protein, "", :organism => organism, :proteins => [self], :format => new_format).exec.first, new_format, organism)
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,33 @@
1
+ require 'rbbt/annotations'
2
+
3
+ module Entity
4
+ class << self
5
+ attr_accessor :formats
6
+ end
7
+ self.formats = {}
8
+
9
+ def self.extended(base)
10
+ base.extend Annotation unless Annotation === base
11
+
12
+ Entity.formats[base.to_s] = base
13
+ base.module_eval do
14
+ class << self
15
+ attr_accessor :template
16
+ alias prev_entity_extended extended
17
+ end
18
+
19
+ def self.extended(data)
20
+ prev_entity_extended(data)
21
+ data.extend AnnotatedArray
22
+ end
23
+
24
+ def self.format=(formats)
25
+ formats = [formats] unless Array === formats
26
+ formats.each do |format|
27
+ Entity.formats[format] = self
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+
@@ -0,0 +1,14 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+
3
+ require 'test/unit'
4
+ require 'rbbt/util/tmpfile'
5
+ require 'test/unit'
6
+ require 'rbbt/entity/gene'
7
+
8
+ class TestGene < Test::Unit::TestCase
9
+ def test_to
10
+ assert_equal "1020", Gene.setup("CDK5", "Associated Gene Name", "Hsa").to("Entrez Gene ID")
11
+ end
12
+ end
13
+
14
+
@@ -0,0 +1,4 @@
1
+ require 'test/unit'
2
+ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
3
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
4
+
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbbt-entities
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Miguel Vazquez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-10-03 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rbbt-util
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rbbt-sources
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: rbbt-dm
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :runtime
62
+ version_requirements: *id003
63
+ description: Entities for handling tsv files, caches, etc
64
+ email: miguel.vazquez@cnio.es
65
+ executables: []
66
+
67
+ extensions: []
68
+
69
+ extra_rdoc_files:
70
+ - LICENSE
71
+ files:
72
+ - LICENSE
73
+ - lib/rbbt/entity.rb
74
+ - lib/rbbt/entity/gene.rb
75
+ - lib/rbbt/entity/genotype.rb
76
+ - lib/rbbt/entity/misc.rb
77
+ - lib/rbbt/entity/protein.rb
78
+ - test/test_helper.rb
79
+ - test/rbbt/entity/test_gene.rb
80
+ has_rdoc: true
81
+ homepage: http://github.com/mikisvaz/rbbt-util
82
+ licenses: []
83
+
84
+ post_install_message:
85
+ rdoc_options: []
86
+
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ hash: 3
95
+ segments:
96
+ - 0
97
+ version: "0"
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ hash: 3
104
+ segments:
105
+ - 0
106
+ version: "0"
107
+ requirements: []
108
+
109
+ rubyforge_project:
110
+ rubygems_version: 1.6.2
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: Entities for the Ruby Bioinformatics Toolkit (rbbt)
114
+ test_files:
115
+ - test/test_helper.rb
116
+ - test/rbbt/entity/test_gene.rb