bio-emboss_six_frame_nucleotide_sequences 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "bio", ">= 1.4.1"
6
+
7
+ # Add dependencies to develop your gem here.
8
+ # Include everything needed to run rake, tests, features, etc.
9
+ group :development do
10
+ gem "shoulda", ">= 0"
11
+ gem "bundler", "~> 1.0.0"
12
+ gem "jeweler", "~> 1.5.2"
13
+ gem "rcov", ">= 0"
14
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Ben J Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,21 @@
1
+ = bio-emboss_six_frame_nucleotide_sequences
2
+
3
+ A plugin for taking a nucleotide sequence, and returning the nucleotide sequence that was translated by the EMBOSS utility 'transeq' in each frame. The first 3 forward frames are easy enough to work out, but the reverse 3 can be somewhat unintuitive, and are different to bioruby.
4
+
5
+ Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(4) => Bio::Sequence::NA.new('CATCAT')
6
+
7
+ == Contributing to bio-emboss_six_frame_nucleotide_sequences
8
+
9
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
10
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
11
+ * Fork the project
12
+ * Start a feature/bugfix branch
13
+ * Commit and push until you are happy with your contribution
14
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
15
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
16
+
17
+ == Copyright
18
+
19
+ Copyright (c) 2011 Ben J Woodcroft. See LICENSE.txt for
20
+ further details.
21
+
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "bio-emboss_six_frame_nucleotide_sequences"
16
+ gem.homepage = "http://github.com/wwood/bioruby-emboss_six_frame_nucleotide_sequences"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
19
+ gem.description = %Q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
20
+ gem.email = "gmail.com after donttrustben"
21
+ gem.authors = ["Ben J Woodcroft"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+ end
27
+ Jeweler::RubygemsDotOrgTasks.new
28
+
29
+ require 'rake/testtask'
30
+ Rake::TestTask.new(:test) do |test|
31
+ test.libs << 'lib' << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+
36
+ require 'rcov/rcovtask'
37
+ Rcov::RcovTask.new do |test|
38
+ test.libs << 'test'
39
+ test.pattern = 'test/**/test_*.rb'
40
+ test.verbose = true
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "bio-emboss_six_frame_nucleotide_sequences #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,68 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{bio-emboss_six_frame_nucleotide_sequences}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Ben J Woodcroft"]
12
+ s.date = %q{2011-04-03}
13
+ s.description = %q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
14
+ s.email = %q{gmail.com after donttrustben}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "Gemfile",
22
+ "LICENSE.txt",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "bio-emboss_six_frame_nucleotide_sequences.gemspec",
27
+ "lib/bio-emboss_six_frame_nucleotide_sequences.rb",
28
+ "lib/bio/sequence/emboss_six_frame_nucleotide_sequences.rb",
29
+ "test/data/test.fa",
30
+ "test/data/test_transeq_6frame.fa",
31
+ "test/helper.rb",
32
+ "test/test_bio-emboss_six_frame_nucleotide_sequences.rb"
33
+ ]
34
+ s.homepage = %q{http://github.com/wwood/bioruby-emboss_six_frame_nucleotide_sequences}
35
+ s.licenses = ["MIT"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = %q{1.6.2}
38
+ s.summary = %q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
39
+ s.test_files = [
40
+ "test/helper.rb",
41
+ "test/test_bio-emboss_six_frame_nucleotide_sequences.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
+ s.add_runtime_dependency(%q<bio>, [">= 1.4.1"])
49
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
50
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
51
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
52
+ s.add_development_dependency(%q<rcov>, [">= 0"])
53
+ else
54
+ s.add_dependency(%q<bio>, [">= 1.4.1"])
55
+ s.add_dependency(%q<shoulda>, [">= 0"])
56
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
57
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
58
+ s.add_dependency(%q<rcov>, [">= 0"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<bio>, [">= 1.4.1"])
62
+ s.add_dependency(%q<shoulda>, [">= 0"])
63
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
64
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
65
+ s.add_dependency(%q<rcov>, [">= 0"])
66
+ end
67
+ end
68
+
@@ -0,0 +1 @@
1
+ require 'bio/sequence/emboss_six_frame_nucleotide_sequences'
@@ -0,0 +1,39 @@
1
+ require 'bio'
2
+
3
+ module Bio
4
+ class Sequence
5
+ class NA
6
+ # Translate this nucleotide sequence into a particular frame, but return
7
+ # the nucleotide sequence of that translation, rather the amino acid
8
+ # sequence. The frame returned aligns with the frames that are generated by
9
+ # the EMBOSS package program transeq. Note that these are different than
10
+ # the translations generated by bioruby itself.
11
+ #
12
+ # Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(1) => Bio::Sequence::NA.new('ATGATG')
13
+ # Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(2) => Bio::Sequence::NA.new('TGATG')
14
+ # Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(4) => Bio::Sequence::NA.new('CATCAT')
15
+ def nucleotide_sequence_of_transeq_translation(frame)
16
+ unless [-1,-2,-3,1,2,3,4,5,6].include?(frame) #error checking
17
+ raise Exception, "unexpected frame for translation: `#{frame.inspect}'"
18
+ end
19
+
20
+ # Offset table for reverse frames. indexed by frame-4, then length%3
21
+ offset_table = [[0,-2,-1],[-1,0,-2],[-2,-1,0]]
22
+
23
+ # deal with the easy case of translating in the forward direction.
24
+ if frame < 4
25
+ return Bio::Sequence::NA.new(self[frame-1..length-1])
26
+ end
27
+
28
+ # translate negatives into positives for reverse sequences
29
+ frame = 6 if frame == -3
30
+ frame = 5 if frame == -2
31
+ frame = 4 if frame == -1
32
+
33
+ remainder = length%3
34
+ offset = offset_table[remainder][frame-4]
35
+ return Bio::Sequence::NA.new(self[0..length-1+offset].reverse_complement)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,7 @@
1
+ >9nt
2
+ ATGATGATG
3
+ >10nt
4
+ ATGATGATGA
5
+ >11nt
6
+ ATGATGATGAT
7
+
@@ -0,0 +1,36 @@
1
+ >9nt_1
2
+ MMM
3
+ >9nt_2
4
+ **X
5
+ >9nt_3
6
+ DDX
7
+ >9nt_4
8
+ HHH
9
+ >9nt_5
10
+ SSX
11
+ >9nt_6
12
+ IIX
13
+ >10nt_1
14
+ MMMX
15
+ >10nt_2
16
+ ***
17
+ >10nt_3
18
+ DDX
19
+ >10nt_4
20
+ HHH
21
+ >10nt_5
22
+ SSSX
23
+ >10nt_6
24
+ IIX
25
+ >11nt_1
26
+ MMMX
27
+ >11nt_2
28
+ ***X
29
+ >11nt_3
30
+ DDD
31
+ >11nt_4
32
+ HHH
33
+ >11nt_5
34
+ SSSX
35
+ >11nt_6
36
+ IIIX
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-emboss_six_frame_nucleotide_sequences'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,114 @@
1
+ require 'helper'
2
+
3
+ class TestBioEmbossSixFrameNucleotideSequences < Test::Unit::TestCase
4
+ na = Bio::Sequence::NA
5
+ @@data_dir = File.join(File.dirname(__FILE__), 'data')
6
+
7
+ should "test length divisible by 3 forwards" do
8
+ # >9nt_1
9
+ # MMM
10
+ # >9nt_2
11
+ # **X
12
+ # >9nt_3
13
+ # DDX
14
+ a = na.new('ATGATGATG')
15
+ assert_equal na.new('ATGATGATG').downcase, a.nucleotide_sequence_of_transeq_translation(1)
16
+ assert_equal na.new('TGATGATG').downcase, a.nucleotide_sequence_of_transeq_translation(2)
17
+ assert_equal na.new('GATGATG').downcase, a.nucleotide_sequence_of_transeq_translation(3)
18
+ end
19
+
20
+ # >9nt_4
21
+ # HHH
22
+ # >9nt_5
23
+ # SSX
24
+ # >9nt_6
25
+ # IIX
26
+ should "test length divisible by 3 backwards" do
27
+ a = na.new('ATGATGATG')
28
+ assert_equal na.new('CATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(4)
29
+ assert_equal na.new('TCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(5)
30
+ assert_equal na.new('ATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(6)
31
+ end
32
+
33
+ should "test length divisible by 3 remainder 1 forwards" do
34
+ # >10nt_1
35
+ # MMMX
36
+ # >10nt_2
37
+ # ***
38
+ # >10nt_3
39
+ # DDX
40
+ a = na.new('ATGATGATGA')
41
+ assert_equal na.new('ATGATGATGA').downcase, a.nucleotide_sequence_of_transeq_translation(1)
42
+ assert_equal na.new('TGATGATGA').downcase, a.nucleotide_sequence_of_transeq_translation(2)
43
+ assert_equal na.new('GATGATGA').downcase, a.nucleotide_sequence_of_transeq_translation(3)
44
+ end
45
+
46
+ # >10nt_4
47
+ # HHH
48
+ # >10nt_5
49
+ # SSSX
50
+ # >10nt_6
51
+ # IIX
52
+ should "test length divisible by 3 remainder 1 backwards" do
53
+ a = na.new('ATGATGATGA')
54
+ assert_equal na.new('CATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(4)
55
+ assert_equal na.new('TCATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(5)
56
+ assert_equal na.new('ATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(6)
57
+ end
58
+
59
+ should "test length divisible by 3 remainder 2 forwards" do
60
+ # >11nt_1
61
+ # MMMX
62
+ # >11nt_2
63
+ # ***X
64
+ # >11nt_3
65
+ # DDD
66
+ a = na.new('ATGATGATGAT')
67
+ assert_equal na.new('ATGATGATGAT').downcase, a.nucleotide_sequence_of_transeq_translation(1)
68
+ assert_equal na.new('TGATGATGAT').downcase, a.nucleotide_sequence_of_transeq_translation(2)
69
+ assert_equal na.new('GATGATGAT').downcase, a.nucleotide_sequence_of_transeq_translation(3)
70
+ end
71
+
72
+ # >11nt_4
73
+ # HHH
74
+ # >11nt_5
75
+ # SSSX
76
+ # >11nt_6
77
+ # IIIX
78
+ should "test length divisible by 3 remainder 2 backwards" do
79
+ a = na.new('ATGATGATGAT')
80
+ assert_equal na.new('CATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(4)
81
+ assert_equal na.new('TCATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(5)
82
+ assert_equal na.new('ATCATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(6)
83
+ end
84
+
85
+ # in test/data, 3 nucleotide sequences have been translated by transeq into
86
+ # 9 different protein sequences. They should match the bioruby translations
87
+ should "should align with the transeq translation" do
88
+ nucleotide_sequences = {}
89
+ protein_sequences = {}
90
+
91
+ # Read in the files
92
+ Bio::FlatFile.foreach(File.join(@@data_dir,'test.fa')) do |seq|
93
+ nucleotide_sequences[seq.entry_id] = seq.seq
94
+ end
95
+ Bio::FlatFile.foreach(File.join(@@data_dir,'test_transeq_6frame.fa')) do |seq|
96
+ protein_sequences[seq.entry_id] = seq.seq
97
+ end
98
+
99
+ # Make sure enough sequences are being tested
100
+ assert_equal 3*6, protein_sequences.length
101
+
102
+ # iterate them all and make sure they match
103
+ protein_sequences.each do |pname, pseq|
104
+ if matches = pname.match(/(.*)_([1-6])/)
105
+ pseq.gsub!(/X/,'') #remove hanging Xs cos bioruby and transeq do that
106
+ # differently
107
+ assert_equal pseq, na.new(nucleotide_sequences[matches[1]]).nucleotide_sequence_of_transeq_translation(matches[2].to_i).translate
108
+ else
109
+ raise
110
+ end
111
+ end
112
+ end
113
+
114
+ end
metadata ADDED
@@ -0,0 +1,156 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-emboss_six_frame_nucleotide_sequences
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Ben J Woodcroft
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-03 00:00:00 +11:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ type: :runtime
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 5
29
+ segments:
30
+ - 1
31
+ - 4
32
+ - 1
33
+ version: 1.4.1
34
+ name: bio
35
+ version_requirements: *id001
36
+ prerelease: false
37
+ - !ruby/object:Gem::Dependency
38
+ type: :development
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ name: shoulda
49
+ version_requirements: *id002
50
+ prerelease: false
51
+ - !ruby/object:Gem::Dependency
52
+ type: :development
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ hash: 23
59
+ segments:
60
+ - 1
61
+ - 0
62
+ - 0
63
+ version: 1.0.0
64
+ name: bundler
65
+ version_requirements: *id003
66
+ prerelease: false
67
+ - !ruby/object:Gem::Dependency
68
+ type: :development
69
+ requirement: &id004 !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ~>
73
+ - !ruby/object:Gem::Version
74
+ hash: 7
75
+ segments:
76
+ - 1
77
+ - 5
78
+ - 2
79
+ version: 1.5.2
80
+ name: jeweler
81
+ version_requirements: *id004
82
+ prerelease: false
83
+ - !ruby/object:Gem::Dependency
84
+ type: :development
85
+ requirement: &id005 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ hash: 3
91
+ segments:
92
+ - 0
93
+ version: "0"
94
+ name: rcov
95
+ version_requirements: *id005
96
+ prerelease: false
97
+ description: a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.
98
+ email: gmail.com after donttrustben
99
+ executables: []
100
+
101
+ extensions: []
102
+
103
+ extra_rdoc_files:
104
+ - LICENSE.txt
105
+ - README.rdoc
106
+ files:
107
+ - .document
108
+ - Gemfile
109
+ - LICENSE.txt
110
+ - README.rdoc
111
+ - Rakefile
112
+ - VERSION
113
+ - bio-emboss_six_frame_nucleotide_sequences.gemspec
114
+ - lib/bio-emboss_six_frame_nucleotide_sequences.rb
115
+ - lib/bio/sequence/emboss_six_frame_nucleotide_sequences.rb
116
+ - test/data/test.fa
117
+ - test/data/test_transeq_6frame.fa
118
+ - test/helper.rb
119
+ - test/test_bio-emboss_six_frame_nucleotide_sequences.rb
120
+ has_rdoc: true
121
+ homepage: http://github.com/wwood/bioruby-emboss_six_frame_nucleotide_sequences
122
+ licenses:
123
+ - MIT
124
+ post_install_message:
125
+ rdoc_options: []
126
+
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ hash: 3
135
+ segments:
136
+ - 0
137
+ version: "0"
138
+ required_rubygems_version: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ hash: 3
144
+ segments:
145
+ - 0
146
+ version: "0"
147
+ requirements: []
148
+
149
+ rubyforge_project:
150
+ rubygems_version: 1.6.2
151
+ signing_key:
152
+ specification_version: 3
153
+ summary: a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.
154
+ test_files:
155
+ - test/helper.rb
156
+ - test/test_bio-emboss_six_frame_nucleotide_sequences.rb