rbbt-entities 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2011 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,102 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/organism'
4
+ require 'rbbt/sources/entrez'
5
+
6
+ Workflow.require_workflow "Translation"
7
+
8
+ module Gene
9
+ extend Entity
10
+
11
+ self.annotation :format
12
+ self.annotation :organism
13
+
14
+ self.format = Organism::Hsa.identifiers.all_fields
15
+
16
+ def name
17
+ to "Associated Gene Name"
18
+ end
19
+
20
+ def description
21
+ if Array === self
22
+ to("Entrez Gene ID").collect{|id| gene = Entrez.get_gene(id); gene.nil? ? nil : gene.summary}
23
+ else
24
+ gene = Entrez.get_gene(to("Entrez Gene ID"))
25
+ gene.nil? ? nil : gene.summary
26
+ end
27
+ end
28
+
29
+ def to!(new_format)
30
+ if Array === self
31
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
32
+ else
33
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => [self], :format => new_format).exec[self], new_format, organism)
34
+ end
35
+ end
36
+
37
+ def to(new_format)
38
+ return self if format == new_format
39
+ if Array === self
40
+ to!(new_format).collect!{|v| v.nil? ? nil : v.first}
41
+ else
42
+ v = to!(new_format)
43
+ v.nil? ? nil : v.first
44
+ end
45
+ end
46
+
47
+ def self2pfam
48
+ index = Organism.gene_pfam(organism).tsv :type => :flat, :persist => true
49
+ if Array === self
50
+ index.values_at(*self).flatten
51
+ else
52
+ index[self]
53
+ end
54
+ end
55
+
56
+ def chromosome
57
+ chr = Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true
58
+ if Array === self
59
+ to("Ensembl Gene ID").collect do |gene|
60
+ chr[gene]
61
+ end
62
+ else
63
+ chr[to("Ensembl Gene ID")]
64
+ end
65
+ end
66
+
67
+ def range
68
+ pos = Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i
69
+ if Array === self
70
+ to("Ensembl Gene ID").collect do |gene|
71
+ next if not pos.include? gene
72
+ Range.new *pos[gene]
73
+ end
74
+ else
75
+ return nil if not pos.include? to("Ensembl Gene ID")
76
+ Range.new *pos[to("Ensembl Gene ID")]
77
+ end
78
+ end
79
+
80
+ end
81
+
82
+ module Transcript
83
+ extend Entity
84
+
85
+ def to!(new_format)
86
+ if Array === self
87
+ Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
88
+ else
89
+ Gene.setup(Translation.job(:tsv_probe_translate, "", :organism => organism, :genes => [self], :format => new_format).exec[self], new_format, organism)
90
+ end
91
+ end
92
+
93
+ def to(new_format)
94
+ return self if format == new_format
95
+ if Array === self
96
+ to!(new_format).collect{|v| v.nil? ? nil : v.first}
97
+ else
98
+ v = to!(new_format)
99
+ v.nil? ? nil : v.first
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,186 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/organism'
4
+ require 'rbbt/mutation/mutation_assessor'
5
+ require 'rbbt/entity/protein'
6
+
7
+ Workflow.require_workflow "Sequence"
8
+
9
+ module MutatedIsoform
10
+ extend Entity
11
+ self.annotation :organism
12
+
13
+ self.format = "Mutated Isoform"
14
+
15
+ def protein
16
+ if Array === self
17
+ Protein.setup(self.collect{|mutation| mutation.split(":").first}, "Ensembl Protein ID", organism)
18
+ else
19
+ Protein.setup(self.split(":").first, "Ensembl Protein ID", organism)
20
+ end
21
+ end
22
+
23
+ def ensembl_protein_image_url
24
+ if Array === self
25
+ self.collect{|e| e.ensembl_protein_image_url}
26
+ else
27
+ ensembl_url = if organism == "Hsa" then "www.ensembl.org" else "#{organism.sub(/.*\//,'')}.archive.ensembl.org" end
28
+ "http://#{ensembl_url}/Homo_sapiens/Component/Transcript/Web/TranslationImage?db=core;p=#{protein};_rmd=d2a8;export=svg"
29
+ end
30
+ end
31
+
32
+ ASTERISK = "*"[0]
33
+ def single_type
34
+ prot, change = self.split(":")
35
+
36
+ case
37
+ when change =~ /UTR/
38
+ "UTR"
39
+ when (change[0] == ASTERISK and not change[0] == change[-1])
40
+ "NOSTOP"
41
+ when (change[-1] == ASTERISK and not change[0] == change[-1])
42
+ "NONSENSE"
43
+ when change =~ /Indel/
44
+ "INDEL"
45
+ when change =~ /FrameShift/
46
+ "FRAMESHIFT"
47
+ when change[0] == change[-1]
48
+ "SYNONYMOUS"
49
+ else
50
+ "MISS-SENSE"
51
+ end
52
+ end
53
+
54
+ def ary_type
55
+ self.collect{|mutation| mutation.single_type}
56
+ end
57
+
58
+
59
+ def type
60
+ Array === self ? ary_type : single_type
61
+ end
62
+
63
+ def filter(*types)
64
+ list = self.zip(type).select do |mutation, type|
65
+ types.include? type
66
+ end.collect{|mutation, type| mutation}
67
+
68
+ MutatedIsoform.setup(list, organism)
69
+ end
70
+
71
+ def self2mutation_assessor_prediction
72
+ if Array === self
73
+ filtered = filter "MISS-SENSE"
74
+ correspondance = {}
75
+ mutations = filtered.zip(filtered.protein.to "UniProt/SwissProt ID").collect do |mutation, uniprot|
76
+ prot, change = mutation.split(":")
77
+ next if uniprot.nil?
78
+ uniprot_change = [uniprot, change]
79
+ correspondance[uniprot_change] = mutation
80
+ uniprot_change
81
+ end.compact
82
+
83
+ tsv = MutationAssessor.chunked_predict(mutations)
84
+ return TSV.setup({}, :key_field => "Mutated Isoform", :fields => ["Func. Impact"]) if tsv.empty?
85
+ tsv.add_field "Mutated Isoform" do |key, values|
86
+ correspondance[key.split(" ")]
87
+ end
88
+ tsv.reorder "Mutated Isoform", ["Func. Impact"]
89
+ else
90
+ prot, change = mutation.split(":")
91
+ uniprot = protein.to "UniProt/SwissProt ID"
92
+ mutations = [uniprot, change]
93
+
94
+ tsv = MutationAssessor.chunked_predict(mutations)
95
+ tsv.add_field "Mutated Isoform" do |key, values|
96
+ self
97
+ end
98
+ tsv.reorder "Mutated Isoform", ["Func. Impact"]
99
+ end
100
+ end
101
+
102
+ def early_nonsense
103
+ protein_sequences = Organism.protein_sequence(organism).tsv :persist => true, :type => :single
104
+ filter("NONSENSE").select{|isoform_mutation|
105
+ protein, mutation = isoform_mutation.split ":"
106
+ if protein_sequences.include? protein
107
+ mutation.match(/(\d+)/)[1].to_f < protein_sequences[protein].length.to_f * 0.7
108
+ else
109
+ false
110
+ end
111
+ }
112
+ end
113
+
114
+ def early_frameshifts
115
+ protein_sequences = Organism.protein_sequence(organism).tsv :persist => true, :type => :single
116
+ filter("FRAMESHIFT").select{|isoform_mutation|
117
+ protein, mutation = isoform_mutation.split ":"
118
+ if protein_sequences.include? protein
119
+ mutation.match(/(\d+)/)[1].to_f < protein_sequences[protein].length.to_f * 0.7
120
+ else
121
+ false
122
+ end
123
+ }
124
+ end
125
+
126
+ def damaged(options = {})
127
+ options = Misc.add_defaults options, :mutation_assesor_cutoff => :medium, :non_sense => true, :frameshift => true
128
+
129
+ levels = [:low, :medium, :high].collect{|v| v.to_s}
130
+ cutoff = levels.index options[:mutation_assesor_cutoff].to_s
131
+
132
+ predicted = self2mutation_assessor_prediction.select{|k, v|
133
+ if v.nil?
134
+ false
135
+ else
136
+ value = levels.index(v[0].to_s)
137
+ value and value >= cutoff
138
+ end
139
+ }.collect{|k,v| k}
140
+
141
+ predicted += early_nonsense if options[:non_sense]
142
+ predicted += early_frameshifts if options[:frameshift]
143
+
144
+ MutatedIsoform.setup(predicted, organism)
145
+ end
146
+ end
147
+
148
+ module GenomicMutation
149
+ extend Entity
150
+ self.annotation :name
151
+ self.annotation :organism
152
+
153
+ self.format = "Genomic Mutation"
154
+
155
+ def self2genes
156
+ Sequence.job(:genes_at_genomic_positions, name, :organism => organism, :positions => Array === self ? self : [self]).run
157
+ end
158
+
159
+ def genes
160
+ Gene.setup(self2genes.values.flatten.uniq, "Ensembl Gene ID", organism)
161
+ end
162
+
163
+ def self2mutated_isoforms
164
+ Sequence.job(:mutated_isoforms_for_genomic_mutations, name, :organism => organism, :mutations => Array === self ? self : [self]).run
165
+ end
166
+
167
+ def mutated_isoforms
168
+ MutatedIsoform.setup(self2mutated_isoforms.values.flatten, organism)
169
+ end
170
+
171
+ def damaging_mutations(options = {})
172
+ damaged_isoforms = mutated_isoforms.damaged(options)
173
+ damaging_mutations = self2mutated_isoforms.select{|mutation, values|
174
+ mutated_isoforms = values["Mutated Isoform"]
175
+ (damaged_isoforms & mutated_isoforms).any?
176
+ }.collect{|mutation, mutated_isoforms| mutation.dup}
177
+ GenomicMutation.setup(damaging_mutations, name + '.damaging', organism)
178
+ end
179
+
180
+ def mutations_at_genes(genes)
181
+ genes = genes.to("Ensembl Gene ID").compact
182
+ s2g = self.self2genes
183
+ subset = s2g.select("Ensembl Gene ID" => genes).keys.collect{|e| e.dup}
184
+ GenomicMutation.setup(subset, name + '.mutations_at_genes', organism)
185
+ end
186
+ end
@@ -0,0 +1,26 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/go'
4
+ require 'rbbt/sources/organism'
5
+ require 'rbbt/entity/gene'
6
+
7
+ module GOTerm
8
+ extend Entity
9
+ self.annotation :organism
10
+
11
+ self.format = ["GO Term", "GO ID"]
12
+
13
+ def name
14
+ if Array === self
15
+ self.collect{|id| GO.id2name(id)}
16
+ else
17
+ GO.id2name(self)
18
+ end
19
+ end
20
+
21
+ def genes
22
+ go2genes = Organism.gene_go(organism).tsv(:key_field => "GO ID", :fields => ["Ensembl Gene ID"], :merge => true, :persist => true)
23
+ go2genes.unnamed = true
24
+ Gene.setup(go2genes[self].first, "Ensembl Gene ID", organism)
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ require 'rbbt/entity'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/sources/organism'
4
+ require 'rbbt/statistics/hypergeometric'
5
+ require 'rbbt/network/paths'
6
+
7
+ Workflow.require_workflow "Translation"
8
+
9
+ module Protein
10
+ extend Entity
11
+ include Entity::Adjacent
12
+ include Entity::Enriched
13
+
14
+ self.annotation :format
15
+ self.annotation :organism
16
+
17
+ def gene
18
+ Gene.setup(to("Ensembl Protein ID"), "Ensembl Protein ID", organism)
19
+ end
20
+
21
+ def to(new_format)
22
+ return self if format == new_format
23
+ if Array === self
24
+ Protein.setup(Translation.job(:translate_protein, "", :organism => organism, :proteins => self, :format => new_format).exec, new_format, organism)
25
+ else
26
+ Protein.setup(Translation.job(:translate_protein, "", :organism => organism, :proteins => [self], :format => new_format).exec.first, new_format, organism)
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,33 @@
1
+ require 'rbbt/annotations'
2
+
3
+ module Entity
4
+ class << self
5
+ attr_accessor :formats
6
+ end
7
+ self.formats = {}
8
+
9
+ def self.extended(base)
10
+ base.extend Annotation unless Annotation === base
11
+
12
+ Entity.formats[base.to_s] = base
13
+ base.module_eval do
14
+ class << self
15
+ attr_accessor :template
16
+ alias prev_entity_extended extended
17
+ end
18
+
19
+ def self.extended(data)
20
+ prev_entity_extended(data)
21
+ data.extend AnnotatedArray
22
+ end
23
+
24
+ def self.format=(formats)
25
+ formats = [formats] unless Array === formats
26
+ formats.each do |format|
27
+ Entity.formats[format] = self
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+
@@ -0,0 +1,14 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+
3
+ require 'test/unit'
4
+ require 'rbbt/util/tmpfile'
5
+ require 'test/unit'
6
+ require 'rbbt/entity/gene'
7
+
8
+ class TestGene < Test::Unit::TestCase
9
+ def test_to
10
+ assert_equal "1020", Gene.setup("CDK5", "Associated Gene Name", "Hsa").to("Entrez Gene ID")
11
+ end
12
+ end
13
+
14
+
@@ -0,0 +1,4 @@
1
+ require 'test/unit'
2
+ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
3
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
4
+
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbbt-entities
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Miguel Vazquez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-10-03 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rbbt-util
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rbbt-sources
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: rbbt-dm
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :runtime
62
+ version_requirements: *id003
63
+ description: Entities for handling tsv files, caches, etc
64
+ email: miguel.vazquez@cnio.es
65
+ executables: []
66
+
67
+ extensions: []
68
+
69
+ extra_rdoc_files:
70
+ - LICENSE
71
+ files:
72
+ - LICENSE
73
+ - lib/rbbt/entity.rb
74
+ - lib/rbbt/entity/gene.rb
75
+ - lib/rbbt/entity/genotype.rb
76
+ - lib/rbbt/entity/misc.rb
77
+ - lib/rbbt/entity/protein.rb
78
+ - test/test_helper.rb
79
+ - test/rbbt/entity/test_gene.rb
80
+ has_rdoc: true
81
+ homepage: http://github.com/mikisvaz/rbbt-util
82
+ licenses: []
83
+
84
+ post_install_message:
85
+ rdoc_options: []
86
+
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ hash: 3
95
+ segments:
96
+ - 0
97
+ version: "0"
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ hash: 3
104
+ segments:
105
+ - 0
106
+ version: "0"
107
+ requirements: []
108
+
109
+ rubyforge_project:
110
+ rubygems_version: 1.6.2
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: Entities for the Ruby Bioinformatics Toolkit (rbbt)
114
+ test_files:
115
+ - test/test_helper.rb
116
+ - test/rbbt/entity/test_gene.rb