bio-synreport 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.6.4"
12
+ gem "rcov", ">= 0"
13
+ gem "bio", ">= 1.4.2"
14
+ end
@@ -0,0 +1,32 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activesupport (3.2.9)
5
+ i18n (~> 0.6)
6
+ multi_json (~> 1.0)
7
+ bio (1.4.3)
8
+ git (1.2.5)
9
+ i18n (0.6.1)
10
+ jeweler (1.6.4)
11
+ bundler (~> 1.0)
12
+ git (>= 1.2.5)
13
+ rake
14
+ multi_json (1.3.7)
15
+ rake (10.0.2)
16
+ rcov (1.0.0)
17
+ shoulda (3.3.2)
18
+ shoulda-context (~> 1.0.1)
19
+ shoulda-matchers (~> 1.4.1)
20
+ shoulda-context (1.0.1)
21
+ shoulda-matchers (1.4.1)
22
+ activesupport (>= 3.0.0)
23
+
24
+ PLATFORMS
25
+ ruby
26
+
27
+ DEPENDENCIES
28
+ bio (>= 1.4.2)
29
+ bundler (~> 1.0.0)
30
+ jeweler (~> 1.6.4)
31
+ rcov
32
+ shoulda
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Dan MacLean
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = bio-synreport
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to bio-synreport
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2012 Dan MacLean. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-synreport"
18
+ gem.homepage = "http://github.com/danmaclean/bioruby-synreport"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Reports whether a nucleotide change results in synonymous or non-synonymous mutations}
21
+ gem.description = %Q{Takes a GFF and genomic sequence file, constructs CDS and when given a position and alternative base will report whether this change is in a coding region and if it results in a synonymous or non-synonymous mutation.}
22
+ gem.email = "maclean.daniel@gmail.com"
23
+ gem.authors = ["Dan MacLean"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'rcov/rcovtask'
36
+ Rcov::RcovTask.new do |test|
37
+ test.libs << 'test'
38
+ test.pattern = 'test/**/test_*.rb'
39
+ test.verbose = true
40
+ test.rcov_opts << '--exclude "gems/*"'
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "bio-synreport #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,63 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "bio-synreport"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Dan MacLean"]
12
+ s.date = "2012-11-21"
13
+ s.description = "Takes a GFF and genomic sequence file, constructs CDS and when given a position and alternative base will report whether this change is in a coding region and if it results in a synonymous or non-synonymous mutation."
14
+ s.email = "maclean.daniel@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ "Gemfile",
21
+ "Gemfile.lock",
22
+ "LICENSE.txt",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "bio-synreport.gemspec",
27
+ "examples/test.rb",
28
+ "lib/bio-synreport.rb",
29
+ "lib/bio/utils/bio-synreport.rb",
30
+ "test/helper.rb",
31
+ "test/test_bio-synreport.rb"
32
+ ]
33
+ s.homepage = "http://github.com/danmaclean/bioruby-synreport"
34
+ s.licenses = ["MIT"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = "1.8.11"
37
+ s.summary = "Reports whether a nucleotide change results in synonymous or non-synonymous mutations"
38
+
39
+ if s.respond_to? :specification_version then
40
+ s.specification_version = 3
41
+
42
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
43
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
44
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
45
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
46
+ s.add_development_dependency(%q<rcov>, [">= 0"])
47
+ s.add_development_dependency(%q<bio>, [">= 1.4.2"])
48
+ else
49
+ s.add_dependency(%q<shoulda>, [">= 0"])
50
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
51
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
52
+ s.add_dependency(%q<rcov>, [">= 0"])
53
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
54
+ end
55
+ else
56
+ s.add_dependency(%q<shoulda>, [">= 0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<rcov>, [">= 0"])
60
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
61
+ end
62
+ end
63
+
@@ -0,0 +1,19 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+
5
+
6
+ require 'bio-synreport'
7
+ require 'pp'
8
+
9
+ #this is how you use it... no really!
10
+
11
+ db = Bio::Util::SynReport.new(:gff => ARGV[0], :fasta => ARGV[1], :verbose => true)
12
+ chr, pos, ref,alt = 'Chr2',7634495, 'a', 't'
13
+ pp db.mutation_info(chr,pos,alt)
14
+
15
+ chr, pos, ref,alt = 'Chr3',123456, 'a', 't'
16
+ pp db.mutation_info(chr,pos,alt)
17
+
18
+ chr, pos, ref,alt = 'Chr2',7626518, 'a', 't'
19
+ pp db.mutation_info(chr,pos,alt)
@@ -0,0 +1,12 @@
1
+ # Please require your code below, respecting the bioruby directory tree.
2
+ # For instance, perhaps the only uncommented line in this file might
3
+ # be something like this:
4
+ #
5
+ # require 'bio/sequence/awesome_sequence_plugin_thingy'
6
+ #
7
+ # and then create the ruby file 'lib/bio/sequence/awesome_sequence_thingy.rb'
8
+ # and put your plugin's code there. It is bad practice to write other code
9
+ # directly into this file, because doing so causes confusion if this biogem
10
+ # was ever to get merged into the main bioruby tree.
11
+
12
+ require 'bio/utils/bio-synreport'
@@ -0,0 +1,143 @@
1
+ require 'rubygems'
2
+ require 'pp'
3
+ require 'bio'
4
+
5
+ module Bio
6
+ class Util
7
+
8
+ class MrnaModel
9
+ attr_accessor :seqname, :gff_id, :strand, :cds, :sequences
10
+
11
+ def initialize(chr, id, strand, cds_arr, seq_arr)
12
+ @seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr
13
+ end
14
+
15
+ def includes?(seq, point)
16
+ @cds.each {|start, stop| return true if @seqname == seq and point.to_i >= start and point.to_i <= stop}
17
+ false
18
+ end
19
+
20
+ def seq
21
+ @sequences.join
22
+ end
23
+
24
+ def substitution_info(chr,point,alt)
25
+ cds_start = @cds.first.first
26
+ running_total = 0
27
+ @cds.each do |start,stop|
28
+ if point.to_i >= start and point.to_i <= stop
29
+ offset = case @strand
30
+ when "+"
31
+ #offset =
32
+ (point.to_i - start) + running_total
33
+ when "-"
34
+ (stop - point.to_i) + running_total
35
+ end #offset = how far into cds SNP is
36
+ codon_number = offset / 3
37
+ position_in_codon = offset % 3
38
+ #pp [offset, codon_number, position_in_codon]
39
+ codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
40
+ codon = codon_array[codon_number]
41
+ nt = codon[position_in_codon]
42
+ new_codon = codon.dup
43
+ new_codon[position_in_codon] = alt.downcase
44
+ #pp [codon, position_in_codon, nt, new_codon]
45
+ a = Bio::Sequence::NA.new(codon).translate.codes.first
46
+ b = Bio::Sequence::NA.new(new_codon).translate.codes.first
47
+ sub_type = a == b ? "SYN" : "NON_SYN"
48
+ return {:id => @gff_id,
49
+ :chr => @seqname,
50
+ :strand => @strand,
51
+ :position => point,
52
+ :original_codon => codon,
53
+ :original_residue => a || 'stop',
54
+ :mutant_codon => new_codon,
55
+ :mutant_residue =>b || 'stop',
56
+ :position_in_codon => position_in_codon + 1,
57
+ :substitution_type => sub_type
58
+ }
59
+ end
60
+ running_total += (stop - start)
61
+ running_total += 1 if @strand == '-' #how far we are into the cds
62
+ end
63
+ end
64
+
65
+ end#class end
66
+
67
+
68
+ class SynReport
69
+ #attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash
70
+
71
+ def initialize(opts)
72
+ @gene_array = []
73
+ @cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
74
+ @mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
75
+ File.open(opts[:gff], "r").each do |gffline|
76
+ record=Bio::GFF::GFF3::Record.new(gffline)
77
+ if(record.feature_type == 'gene')
78
+ @gene_array << [record.seqname, record.id]
79
+ elsif(record.feature_type == 'CDS' or record.feature_type == 'mRNA')
80
+ parents = record.get_attributes('Parent')
81
+ parents.each do |parent|
82
+ if record.feature_type == 'CDS'
83
+ @cdshash[record.seqname][parent] << record
84
+ else
85
+ @mRNAhash[record.seqname][parent] << record
86
+ end
87
+ end
88
+ end
89
+ end
90
+ $stderr.puts "Loaded GFF..." if opts[:verbose]
91
+ @seqhash = {}
92
+ Bio::FastaFormat.open(opts[:fasta]).each { |seq| @seqhash[seq.entry_id] = seq.to_seq }
93
+ $stderr.puts "Loaded Seq..." if opts[:verbose]
94
+
95
+ @models = Hash.new {|h,k| h[k] = [] }
96
+ $stderr.puts "Building models..." if opts[:verbose]
97
+ @gene_array.each do |gene|
98
+
99
+ mRNAs=@mRNAhash[gene.first][gene.last]
100
+ mRNAs.each do |mRNA|
101
+ next if @seqhash[gene.first].nil?
102
+ cdsa = []
103
+ seqs = []
104
+ cdsary=@cdshash[gene.first][mRNA.id]
105
+ cdsary.each {|c| cdsa << [c.start, c.end]}
106
+ cdsa.sort!
107
+ cdsa.reverse! if mRNA.strand == '-'
108
+
109
+ cdsa.each do |cds|
110
+
111
+ #cdsa << [cds.start, cds.end]
112
+ if mRNA.strand == '+'
113
+ seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") )
114
+ elsif mRNA.strand == "-"
115
+ seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ).complement
116
+ end
117
+ end
118
+ @models[mRNA.seqname] << Bio::Util::MrnaModel.new(mRNA.seqname, mRNA.id, mRNA.strand, cdsa, seqs )
119
+ #pp @models[mRNA.seqname][-1].cds if mRNA.id == 'AT2G17530.1' or mRNA.id == 'AT2G17550.1'
120
+ end
121
+ end
122
+ $stderr.puts "Models built..." if opts[:verbose]
123
+ end#init end
124
+
125
+ def is_in_cds?(chr,point)
126
+ @self.mutation_info(chr,point) ? true : false
127
+ end
128
+
129
+ #returns mutation info if point in CDS, if not in CDS returns false
130
+ def mutation_info(chr,pos,alt)
131
+
132
+ @models[chr].each do |m|
133
+ if m.includes?(chr,pos)
134
+ return m.substitution_info(chr,pos,alt)
135
+ end
136
+ end
137
+ false
138
+ end
139
+
140
+
141
+ end#class end
142
+ end#class util end
143
+ end# module end
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-synreport'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestBioSynreport < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,152 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-synreport
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Dan MacLean
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-11-21 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ type: :development
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ version_requirements: *id001
33
+ name: shoulda
34
+ - !ruby/object:Gem::Dependency
35
+ type: :development
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ hash: 23
43
+ segments:
44
+ - 1
45
+ - 0
46
+ - 0
47
+ version: 1.0.0
48
+ version_requirements: *id002
49
+ name: bundler
50
+ - !ruby/object:Gem::Dependency
51
+ type: :development
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ hash: 7
59
+ segments:
60
+ - 1
61
+ - 6
62
+ - 4
63
+ version: 1.6.4
64
+ version_requirements: *id003
65
+ name: jeweler
66
+ - !ruby/object:Gem::Dependency
67
+ type: :development
68
+ prerelease: false
69
+ requirement: &id004 !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ version_requirements: *id004
79
+ name: rcov
80
+ - !ruby/object:Gem::Dependency
81
+ type: :development
82
+ prerelease: false
83
+ requirement: &id005 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ hash: 3
89
+ segments:
90
+ - 1
91
+ - 4
92
+ - 2
93
+ version: 1.4.2
94
+ version_requirements: *id005
95
+ name: bio
96
+ description: Takes a GFF and genomic sequence file, constructs CDS and when given a position and alternative base will report whether this change is in a coding region and if it results in a synonymous or non-synonymous mutation.
97
+ email: maclean.daniel@gmail.com
98
+ executables: []
99
+
100
+ extensions: []
101
+
102
+ extra_rdoc_files:
103
+ - LICENSE.txt
104
+ - README.rdoc
105
+ files:
106
+ - Gemfile
107
+ - Gemfile.lock
108
+ - LICENSE.txt
109
+ - README.rdoc
110
+ - Rakefile
111
+ - VERSION
112
+ - bio-synreport.gemspec
113
+ - examples/test.rb
114
+ - lib/bio-synreport.rb
115
+ - lib/bio/utils/bio-synreport.rb
116
+ - test/helper.rb
117
+ - test/test_bio-synreport.rb
118
+ homepage: http://github.com/danmaclean/bioruby-synreport
119
+ licenses:
120
+ - MIT
121
+ post_install_message:
122
+ rdoc_options: []
123
+
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ hash: 3
132
+ segments:
133
+ - 0
134
+ version: "0"
135
+ required_rubygems_version: !ruby/object:Gem::Requirement
136
+ none: false
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ hash: 3
141
+ segments:
142
+ - 0
143
+ version: "0"
144
+ requirements: []
145
+
146
+ rubyforge_project:
147
+ rubygems_version: 1.8.11
148
+ signing_key:
149
+ specification_version: 3
150
+ summary: Reports whether a nucleotide change results in synonymous or non-synonymous mutations
151
+ test_files: []
152
+