bio-alignment 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -82,14 +82,14 @@ Write a (simple) version of pal2nal would be something like
82
82
  end
83
83
  ```
84
84
 
85
- With aln1 and aln2, the library version is the shorter
85
+ With amino acid aln1 and nucleotide aln2 loaded, the library version is the shorter
86
86
 
87
87
  ```ruby
88
88
  aln3 = aln1.pal2nal(aln2)
89
- fasta3 = FastaWriter.new('nt-aln.fa')
90
- aln3.each { | rec | fasta3.write(rec) }
91
89
  ```
92
90
 
91
+ aln3 containing the codon alignment.
92
+
93
93
  The API documentation is online. For more code examples see ./spec/*.rb and
94
94
  ./features/*
95
95
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -1,5 +1,7 @@
1
1
  # Bio-alignment design
2
2
 
3
+ ''A well designed library should be simple and elegant to use...''
4
+
3
5
  ## Introduction
4
6
 
5
7
  Biological multi-sequence alignments (MSA) are normally matrices of
@@ -11,4 +11,3 @@ Feature: Read codon file
11
11
  And it should write a nucleotide alignment
12
12
  And it should write an amino acid alignment
13
13
 
14
-
@@ -39,6 +39,6 @@ Then /^I should be able to generate a codon alignment$/ do
39
39
  end
40
40
 
41
41
  Then /^I should be able to generate a codon alignment directly with pal2nal$/ do
42
- # pal2nal = @aln.pal2nal(@aln1)
43
- pending
42
+ aln3 = @aln.pal2nal(@aln2)
43
+ aln3.sequences[1].to_s.size.should == 1615
44
44
  end
@@ -1,10 +1,13 @@
1
1
  # Alignment
2
2
 
3
+ require 'bio-alignment/pal2nal'
4
+
3
5
  module Bio
4
6
  module BioAlignment
5
7
 
6
8
  class Alignment
7
9
  include Enumerable
10
+ include Pal2Nal
8
11
 
9
12
  attr_accessor :sequences
10
13
 
@@ -4,12 +4,13 @@ require 'bio'
4
4
  module Bio
5
5
  module BioAlignment
6
6
 
7
- CODON_TABLE = Bio::CodonTable[1] # BioRuby Eukaryote table
8
-
9
- # Codon element for the matrix
7
+ # Codon element for the matrix, used by CodonSequence.
10
8
  class Codon
11
- def initialize codon
9
+ attr_reader :codon_table
10
+
11
+ def initialize codon, codon_table = 1
12
12
  @codon = codon
13
+ @codon_table = codon_table
13
14
  end
14
15
 
15
16
  def gap?
@@ -17,7 +18,7 @@ module Bio
17
18
  end
18
19
 
19
20
  def undefined?
20
- aa = CODON_TABLE[@codon]
21
+ aa = translate
21
22
  if aa == nil and not gap?
22
23
  return true
23
24
  end
@@ -30,8 +31,8 @@ module Bio
30
31
 
31
32
  # lazily convert to Amino acid (once only)
32
33
  def to_aa
33
- @aa ||= CODON_TABLE[@codon]
34
- if not @aa
34
+ aa = translate
35
+ if not aa
35
36
  if gap?
36
37
  return '-'
37
38
  elsif undefined?
@@ -40,22 +41,32 @@ module Bio
40
41
  raise 'What?'
41
42
  end
42
43
  end
44
+ aa
45
+ end
46
+
47
+ private
48
+
49
+ def translate
50
+ @aa ||= Bio::CodonTable[@codon_table][@codon]
43
51
  @aa
44
52
  end
45
53
  end
46
54
 
47
55
  # A CodonSequence supports the concept of codons (triple
48
- # nucleotides) for an alignment
56
+ # nucleotides) for an alignment. A codon table number can be passed
57
+ # in for translation of nucleotide sequences. This is the same
58
+ # table used in BioRuby.
49
59
  #
50
60
  class CodonSequence
51
61
  include Enumerable
52
62
 
53
63
  attr_reader :id, :seq
54
- def initialize id, seq
64
+ def initialize id, seq, options = { :codon_table => 1 }
55
65
  @id = id
56
66
  @seq = []
67
+ @codon_table = options[:codon_table]
57
68
  seq.scan(/\S\S\S/).each do | codon |
58
- @seq << Codon.new(codon)
69
+ @seq << Codon.new(codon, @codon_table)
59
70
  end
60
71
  end
61
72
 
@@ -0,0 +1,36 @@
1
+ # pal2nal (protein alignment to nucleotide alignment) implementation in Ruby
2
+
3
+ module Bio
4
+ module BioAlignment
5
+ module Pal2Nal
6
+ def pal2nal nt_aln
7
+ aa_aln = self
8
+ codon_aln = Alignment.new
9
+ aa_aln.each_with_index do | aaseq, i |
10
+ ntseq = nt_aln.sequences[i]
11
+ raise "pal2nal sequence IDs do not match (for #{aaseq.id} != #{ntseq.id})" if aaseq.id != ntseq.id
12
+ raise "pal2nal sequence size does not match (for #{aaseq.id}'s #{aaseq.to_s.size}!= #{ntseq.to_s.size * 3})" if aaseq.id != ntseq.id
13
+ codonseq = CodonSequence.new(ntseq.id, ntseq.seq)
14
+
15
+ codon_pos = 0
16
+ result = []
17
+ aaseq.each do | aa |
18
+ result <<
19
+ if aa.gap?
20
+ '---' # inject codon gap
21
+ else
22
+ codon = codonseq[codon_pos]
23
+ # validate codon translates to amino acid
24
+ raise "codon does not match amino acid (for #{aaseq.id}, position #{codon_pos}, #{codon} translates to #{codon.to_aa} instead of #{aa.to_s})" if codon.to_aa != aa.to_s
25
+ codon_pos += 1
26
+ codon.to_s
27
+ end
28
+ end
29
+ codon_seq = CodonSequence.new(aaseq.id, result.join(''))
30
+ codon_aln.sequences << codon_seq
31
+ end
32
+ codon_aln
33
+ end
34
+ end
35
+ end
36
+ end
@@ -9,6 +9,9 @@ module Bio
9
9
  def gap?
10
10
  @c == '-'
11
11
  end
12
+ def to_s
13
+ @c
14
+ end
12
15
  end
13
16
 
14
17
  # A Sequence is a simple container for String sequences/lists
@@ -3,7 +3,27 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  require 'bigbio'
4
4
  include Bio::BioAlignment # Namespace
5
5
 
6
- describe "BioAlignment" do
6
+ describe "BioAlignment::CodonSequence" do
7
+ it "should support different codon tables" do
8
+ seq = CodonSequence.new("test", "atgcccagacgattgg")
9
+ seq[0].to_aa.should == "M"
10
+ seq[2].to_s.should == "aga"
11
+ seq[2].to_aa.should == "R"
12
+
13
+ seq5 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 5)
14
+ seq5[2].codon_table.should == 5
15
+ seq5[0].to_aa.should == "M"
16
+ seq5[2].to_s.should == "aga"
17
+ seq5[2].to_aa.should == "S"
18
+
19
+ seq2 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 2)
20
+ seq2[2].codon_table.should == 2
21
+ seq2[0].to_aa.should == "M"
22
+ seq2[2].to_aa.should == "*"
23
+ end
24
+ end
25
+
26
+ describe "BioAlignment::Alignment" do
7
27
 
8
28
  it "should allow for adding FastaRecords that contain and id and seq" do
9
29
  aln = Alignment.new
@@ -15,7 +35,7 @@ describe "BioAlignment" do
15
35
  aln.sequences.first.seq[0..15].should == "atgcccactcgattgg"
16
36
  end
17
37
 
18
- it "should allow CodonSequence inputs" do
38
+ it "should allow CodonSequence as an input" do
19
39
  aln = Alignment.new
20
40
  fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
21
41
  fasta.each do | rec |
@@ -29,4 +49,6 @@ describe "BioAlignment" do
29
49
  aln.sequences.first.seq[0].to_aa.should == "M"
30
50
  aln.sequences.first.seq[2].to_aa.should == "T"
31
51
  end
52
+
32
53
  end
54
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-03 00:00:00.000000000Z
12
+ date: 2012-02-04 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio-logger
16
- requirement: &13944060 !ruby/object:Gem::Requirement
16
+ requirement: &23657180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *13944060
24
+ version_requirements: *23657180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bio
27
- requirement: &13932960 !ruby/object:Gem::Requirement
27
+ requirement: &23655840 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.4.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *13932960
35
+ version_requirements: *23655840
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: bio-bigbio
38
- requirement: &13932060 !ruby/object:Gem::Requirement
38
+ requirement: &23654620 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>'
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.1.3
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *13932060
46
+ version_requirements: *23654620
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: cucumber
49
- requirement: &13930840 !ruby/object:Gem::Requirement
49
+ requirement: &23653020 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *13930840
57
+ version_requirements: *23653020
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &13929020 !ruby/object:Gem::Requirement
60
+ requirement: &23651800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.3.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *13929020
68
+ version_requirements: *23651800
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
- requirement: &13927920 !ruby/object:Gem::Requirement
71
+ requirement: &23650920 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.0
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *13927920
79
+ version_requirements: *23650920
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: jeweler
82
- requirement: &13926060 !ruby/object:Gem::Requirement
82
+ requirement: &23650280 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: 1.7.0
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *13926060
90
+ version_requirements: *23650280
91
91
  description: Alignment handler for multiple sequence alignments (MSA)
92
92
  email: pjotr.public01@thebird.nl
93
93
  executables:
@@ -113,6 +113,7 @@ files:
113
113
  - lib/bio-alignment.rb
114
114
  - lib/bio-alignment/alignment.rb
115
115
  - lib/bio-alignment/codonsequence.rb
116
+ - lib/bio-alignment/pal2nal.rb
116
117
  - lib/bio-alignment/sequence.rb
117
118
  - spec/bio-alignment_spec.rb
118
119
  - spec/spec_helper.rb
@@ -137,7 +138,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
137
138
  version: '0'
138
139
  segments:
139
140
  - 0
140
- hash: 3814717210769440369
141
+ hash: -3154039158347420524
141
142
  required_rubygems_version: !ruby/object:Gem::Requirement
142
143
  none: false
143
144
  requirements: