RubyGems - bio-emboss_six_frame_nucleotide_sequences - Versions diffs - 0.1.0 - Mend

bio-emboss_six_frame_nucleotide_sequences 0.1.0

Files changed (14) hide show

data/.document +5 -0
data/Gemfile +14 -0
data/LICENSE.txt +20 -0
data/README.rdoc +21 -0
data/Rakefile +53 -0
data/VERSION +1 -0
data/bio-emboss_six_frame_nucleotide_sequences.gemspec +68 -0
data/lib/bio-emboss_six_frame_nucleotide_sequences.rb +1 -0
data/lib/bio/sequence/emboss_six_frame_nucleotide_sequences.rb +39 -0
data/test/data/test.fa +7 -0
data/test/data/test_transeq_6frame.fa +36 -0
data/test/helper.rb +18 -0
data/test/test_bio-emboss_six_frame_nucleotide_sequences.rb +114 -0
metadata +156 -0

data/.document ADDED

@@ -0,0 +1,5 @@
+lib/**/*.rb
+bin/*
+-
+features/**/*.feature
+LICENSE.txt

data/Gemfile ADDED

@@ -0,0 +1,14 @@
+source "http://rubygems.org"
+# Add dependencies required to use your gem here.
+# Example:
+#   gem "activesupport", ">= 2.3.5"
+gem "bio", ">= 1.4.1"
+# Add dependencies to develop your gem here.
+# Include everything needed to run rake, tests, features, etc.
+group :development do
+  gem "shoulda", ">= 0"
+  gem "bundler", "~> 1.0.0"
+  gem "jeweler", "~> 1.5.2"
+  gem "rcov", ">= 0"
+end

data/LICENSE.txt ADDED

@@ -0,0 +1,20 @@
+Copyright (c) 2011 Ben J Woodcroft
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.rdoc ADDED

@@ -0,0 +1,21 @@
+= bio-emboss_six_frame_nucleotide_sequences
+A plugin for taking a nucleotide sequence, and returning the nucleotide sequence that was translated by the EMBOSS utility 'transeq' in each frame. The first 3 forward frames are easy enough to work out, but the reverse 3 can be somewhat unintuitive, and are different to bioruby.
+  Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(4) => Bio::Sequence::NA.new('CATCAT')
+== Contributing to bio-emboss_six_frame_nucleotide_sequences
+* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
+* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
+* Fork the project
+* Start a feature/bugfix branch
+* Commit and push until you are happy with your contribution
+* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
+* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
+== Copyright
+Copyright (c) 2011 Ben J Woodcroft. See LICENSE.txt for
+further details.

data/Rakefile ADDED

@@ -0,0 +1,53 @@
+require 'rubygems'
+require 'bundler'
+begin
+  Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
+require 'rake'
+require 'jeweler'
+Jeweler::Tasks.new do |gem|
+  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
+  gem.name = "bio-emboss_six_frame_nucleotide_sequences"
+  gem.homepage = "http://github.com/wwood/bioruby-emboss_six_frame_nucleotide_sequences"
+  gem.license = "MIT"
+  gem.summary = %Q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
+  gem.description = %Q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
+  gem.email = "gmail.com after donttrustben"
+  gem.authors = ["Ben J Woodcroft"]
+  # Include your dependencies below. Runtime dependencies are required when using your gem,
+  # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
+  #  gem.add_runtime_dependency 'jabber4r', '> 0.1'
+  #  gem.add_development_dependency 'rspec', '> 1.2.3'
+end
+Jeweler::RubygemsDotOrgTasks.new
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+end
+require 'rcov/rcovtask'
+Rcov::RcovTask.new do |test|
+  test.libs << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+end
+task :default => :test
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "bio-emboss_six_frame_nucleotide_sequences #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED

	@@ -0,0 +1 @@
1	+ 0.1.0

data/bio-emboss_six_frame_nucleotide_sequences.gemspec ADDED

@@ -0,0 +1,68 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{bio-emboss_six_frame_nucleotide_sequences}
+  s.version = "0.1.0"
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Ben J Woodcroft"]
+  s.date = %q{2011-04-03}
+  s.description = %q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
+  s.email = %q{gmail.com after donttrustben}
+  s.extra_rdoc_files = [
+    "LICENSE.txt",
+    "README.rdoc"
+  ]
+  s.files = [
+    ".document",
+    "Gemfile",
+    "LICENSE.txt",
+    "README.rdoc",
+    "Rakefile",
+    "VERSION",
+    "bio-emboss_six_frame_nucleotide_sequences.gemspec",
+    "lib/bio-emboss_six_frame_nucleotide_sequences.rb",
+    "lib/bio/sequence/emboss_six_frame_nucleotide_sequences.rb",
+    "test/data/test.fa",
+    "test/data/test_transeq_6frame.fa",
+    "test/helper.rb",
+    "test/test_bio-emboss_six_frame_nucleotide_sequences.rb"
+  ]
+  s.homepage = %q{http://github.com/wwood/bioruby-emboss_six_frame_nucleotide_sequences}
+  s.licenses = ["MIT"]
+  s.require_paths = ["lib"]
+  s.rubygems_version = %q{1.6.2}
+  s.summary = %q{a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.}
+  s.test_files = [
+    "test/helper.rb",
+    "test/test_bio-emboss_six_frame_nucleotide_sequences.rb"
+  ]
+  if s.respond_to? :specification_version then
+    s.specification_version = 3
+    if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
+      s.add_runtime_dependency(%q<bio>, [">= 1.4.1"])
+      s.add_development_dependency(%q<shoulda>, [">= 0"])
+      s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
+      s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
+      s.add_development_dependency(%q<rcov>, [">= 0"])
+    else
+      s.add_dependency(%q<bio>, [">= 1.4.1"])
+      s.add_dependency(%q<shoulda>, [">= 0"])
+      s.add_dependency(%q<bundler>, ["~> 1.0.0"])
+      s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
+      s.add_dependency(%q<rcov>, [">= 0"])
+    end
+  else
+    s.add_dependency(%q<bio>, [">= 1.4.1"])
+    s.add_dependency(%q<shoulda>, [">= 0"])
+    s.add_dependency(%q<bundler>, ["~> 1.0.0"])
+    s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
+    s.add_dependency(%q<rcov>, [">= 0"])
+  end
+end

data/lib/bio-emboss_six_frame_nucleotide_sequences.rb ADDED

	@@ -0,0 +1 @@
1	+ require 'bio/sequence/emboss_six_frame_nucleotide_sequences'

data/lib/bio/sequence/emboss_six_frame_nucleotide_sequences.rb ADDED

@@ -0,0 +1,39 @@
+require 'bio'
+module Bio
+  class Sequence
+    class NA
+      # Translate this nucleotide sequence into a particular frame, but return
+      # the nucleotide sequence of that translation, rather the amino acid
+      # sequence. The frame returned aligns with the frames that are generated by
+      # the EMBOSS package program transeq. Note that these are different than
+      # the translations generated by bioruby itself.
+      #
+      #   Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(1) => Bio::Sequence::NA.new('ATGATG')
+      #   Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(2) => Bio::Sequence::NA.new('TGATG')
+      #   Bio::Sequence::NA.new('ATGATG').nucleotide_sequence_of_transeq_translation(4) => Bio::Sequence::NA.new('CATCAT')
+      def nucleotide_sequence_of_transeq_translation(frame)
+        unless [-1,-2,-3,1,2,3,4,5,6].include?(frame) #error checking
+          raise Exception, "unexpected frame for translation: `#{frame.inspect}'"
+        end
+        # Offset table for reverse frames. indexed by frame-4, then length%3
+        offset_table = [[0,-2,-1],[-1,0,-2],[-2,-1,0]]
+        # deal with the easy case of translating in the forward direction.
+        if frame < 4
+          return Bio::Sequence::NA.new(self[frame-1..length-1])
+        end
+        # translate negatives into positives for reverse sequences
+        frame = 6 if frame == -3
+        frame = 5 if frame == -2
+        frame = 4 if frame == -1
+        remainder = length%3
+        offset = offset_table[remainder][frame-4]
+        return Bio::Sequence::NA.new(self[0..length-1+offset].reverse_complement)
+      end
+    end
+  end
+end

data/test/data/test.fa ADDED

@@ -0,0 +1,7 @@
+>9nt
+ATGATGATG
+>10nt
+ATGATGATGA
+>11nt
+ATGATGATGAT

data/test/data/test_transeq_6frame.fa ADDED

@@ -0,0 +1,36 @@
+>9nt_1
+MMM
+>9nt_2
+**X
+>9nt_3
+DDX
+>9nt_4
+HHH
+>9nt_5
+SSX
+>9nt_6
+IIX
+>10nt_1
+MMMX
+>10nt_2
+***
+>10nt_3
+DDX
+>10nt_4
+HHH
+>10nt_5
+SSSX
+>10nt_6
+IIX
+>11nt_1
+MMMX
+>11nt_2
+***X
+>11nt_3
+DDD
+>11nt_4
+HHH
+>11nt_5
+SSSX
+>11nt_6
+IIIX

data/test/helper.rb ADDED

@@ -0,0 +1,18 @@
+require 'rubygems'
+require 'bundler'
+begin
+  Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
+require 'test/unit'
+require 'shoulda'
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'bio-emboss_six_frame_nucleotide_sequences'
+class Test::Unit::TestCase
+end

data/test/test_bio-emboss_six_frame_nucleotide_sequences.rb ADDED

@@ -0,0 +1,114 @@
+require 'helper'
+class TestBioEmbossSixFrameNucleotideSequences < Test::Unit::TestCase
+  na = Bio::Sequence::NA
+  @@data_dir = File.join(File.dirname(__FILE__), 'data')
+  should "test length divisible by 3 forwards" do
+  # >9nt_1
+  # MMM
+  # >9nt_2
+  # **X
+  # >9nt_3
+  # DDX
+    a = na.new('ATGATGATG')
+    assert_equal na.new('ATGATGATG').downcase, a.nucleotide_sequence_of_transeq_translation(1)
+    assert_equal na.new('TGATGATG').downcase, a.nucleotide_sequence_of_transeq_translation(2)
+    assert_equal na.new('GATGATG').downcase, a.nucleotide_sequence_of_transeq_translation(3)
+  end
+  # >9nt_4
+  # HHH
+  # >9nt_5
+  # SSX
+  # >9nt_6
+  # IIX
+  should "test length divisible by 3 backwards" do
+    a = na.new('ATGATGATG')
+    assert_equal na.new('CATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(4)
+    assert_equal na.new('TCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(5)
+    assert_equal na.new('ATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(6)
+  end
+  should "test length divisible by 3 remainder 1 forwards" do
+  # >10nt_1
+  # MMMX
+  # >10nt_2
+  # ***
+  # >10nt_3
+  # DDX
+    a = na.new('ATGATGATGA')
+    assert_equal na.new('ATGATGATGA').downcase, a.nucleotide_sequence_of_transeq_translation(1)
+    assert_equal na.new('TGATGATGA').downcase, a.nucleotide_sequence_of_transeq_translation(2)
+    assert_equal na.new('GATGATGA').downcase, a.nucleotide_sequence_of_transeq_translation(3)
+  end
+  # >10nt_4
+  # HHH
+  # >10nt_5
+  # SSSX
+  # >10nt_6
+  # IIX
+  should "test length divisible by 3 remainder 1 backwards" do
+    a = na.new('ATGATGATGA')
+    assert_equal na.new('CATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(4)
+    assert_equal na.new('TCATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(5)
+    assert_equal na.new('ATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(6)
+  end
+  should "test length divisible by 3 remainder 2 forwards" do
+  # >11nt_1
+  # MMMX
+  # >11nt_2
+  # ***X
+  # >11nt_3
+  # DDD
+    a = na.new('ATGATGATGAT')
+    assert_equal na.new('ATGATGATGAT').downcase, a.nucleotide_sequence_of_transeq_translation(1)
+    assert_equal na.new('TGATGATGAT').downcase, a.nucleotide_sequence_of_transeq_translation(2)
+    assert_equal na.new('GATGATGAT').downcase, a.nucleotide_sequence_of_transeq_translation(3)
+  end
+  # >11nt_4
+  # HHH
+  # >11nt_5
+  # SSSX
+  # >11nt_6
+  # IIIX
+  should "test length divisible by 3 remainder 2 backwards" do
+    a = na.new('ATGATGATGAT')
+    assert_equal na.new('CATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(4)
+    assert_equal na.new('TCATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(5)
+    assert_equal na.new('ATCATCATCAT').downcase, a.nucleotide_sequence_of_transeq_translation(6)
+  end
+  # in test/data, 3 nucleotide sequences have been translated by transeq into
+  # 9 different protein sequences. They should match the bioruby translations
+  should "should align with the transeq translation" do
+    nucleotide_sequences = {}
+    protein_sequences = {}
+    # Read in the files
+    Bio::FlatFile.foreach(File.join(@@data_dir,'test.fa')) do |seq|
+      nucleotide_sequences[seq.entry_id] = seq.seq
+    end
+    Bio::FlatFile.foreach(File.join(@@data_dir,'test_transeq_6frame.fa')) do |seq|
+      protein_sequences[seq.entry_id] = seq.seq
+    end
+    # Make sure enough sequences are being tested
+    assert_equal 3*6, protein_sequences.length
+    # iterate them all and make sure they match
+    protein_sequences.each do |pname, pseq|
+      if matches = pname.match(/(.*)_([1-6])/)
+        pseq.gsub!(/X/,'') #remove hanging Xs cos bioruby and transeq do that
+        # differently
+        assert_equal pseq, na.new(nucleotide_sequences[matches[1]]).nucleotide_sequence_of_transeq_translation(matches[2].to_i).translate
+      else
+        raise
+      end
+    end
+  end
+end

metadata ADDED

@@ -0,0 +1,156 @@
+--- !ruby/object:Gem::Specification
+name: bio-emboss_six_frame_nucleotide_sequences
+version: !ruby/object:Gem::Version
+  hash: 27
+  prerelease:
+  segments:
+  - 0
+  - 1
+  - 0
+  version: 0.1.0
+platform: ruby
+authors:
+- Ben J Woodcroft
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-04-03 00:00:00 +11:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  type: :runtime
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 5
+        segments:
+        - 1
+        - 4
+        - 1
+        version: 1.4.1
+  name: bio
+  version_requirements: *id001
+  prerelease: false
+- !ruby/object:Gem::Dependency
+  type: :development
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  name: shoulda
+  version_requirements: *id002
+  prerelease: false
+- !ruby/object:Gem::Dependency
+  type: :development
+  requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 23
+        segments:
+        - 1
+        - 0
+        - 0
+        version: 1.0.0
+  name: bundler
+  version_requirements: *id003
+  prerelease: false
+- !ruby/object:Gem::Dependency
+  type: :development
+  requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 7
+        segments:
+        - 1
+        - 5
+        - 2
+        version: 1.5.2
+  name: jeweler
+  version_requirements: *id004
+  prerelease: false
+- !ruby/object:Gem::Dependency
+  type: :development
+  requirement: &id005 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  name: rcov
+  version_requirements: *id005
+  prerelease: false
+description: a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.
+email: gmail.com after donttrustben
+executables: []
+extensions: []
+extra_rdoc_files:
+- LICENSE.txt
+- README.rdoc
+files:
+- .document
+- Gemfile
+- LICENSE.txt
+- README.rdoc
+- Rakefile
+- VERSION
+- bio-emboss_six_frame_nucleotide_sequences.gemspec
+- lib/bio-emboss_six_frame_nucleotide_sequences.rb
+- lib/bio/sequence/emboss_six_frame_nucleotide_sequences.rb
+- test/data/test.fa
+- test/data/test_transeq_6frame.fa
+- test/helper.rb
+- test/test_bio-emboss_six_frame_nucleotide_sequences.rb
+has_rdoc: true
+homepage: http://github.com/wwood/bioruby-emboss_six_frame_nucleotide_sequences
+licenses:
+- MIT
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.6.2
+signing_key:
+specification_version: 3
+summary: a method to get the nucleotide sequence of translations done by the EMBOSS bioinformatics package program transeq.
+test_files:
+- test/helper.rb
+- test/test_bio-emboss_six_frame_nucleotide_sequences.rb