bio-alignment 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -82,14 +82,14 @@ Write a (simple) version of pal2nal would be something like
82
82
  end
83
83
  ```
84
84
 
85
- With aln1 and aln2, the library version is the shorter
85
+ With amino acid aln1 and nucleotide aln2 loaded, the library version is the shorter
86
86
 
87
87
  ```ruby
88
88
  aln3 = aln1.pal2nal(aln2)
89
- fasta3 = FastaWriter.new('nt-aln.fa')
90
- aln3.each { | rec | fasta3.write(rec) }
91
89
  ```
92
90
 
91
+ aln3 containing the codon alignment.
92
+
93
93
  The API documentation is online. For more code examples see ./spec/*.rb and
94
94
  ./features/*
95
95
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -1,5 +1,7 @@
1
1
  # Bio-alignment design
2
2
 
3
+ ''A well designed library should be simple and elegant to use...''
4
+
3
5
  ## Introduction
4
6
 
5
7
  Biological multi-sequence alignments (MSA) are normally matrices of
@@ -11,4 +11,3 @@ Feature: Read codon file
11
11
  And it should write a nucleotide alignment
12
12
  And it should write an amino acid alignment
13
13
 
14
-
@@ -39,6 +39,6 @@ Then /^I should be able to generate a codon alignment$/ do
39
39
  end
40
40
 
41
41
  Then /^I should be able to generate a codon alignment directly with pal2nal$/ do
42
- # pal2nal = @aln.pal2nal(@aln1)
43
- pending
42
+ aln3 = @aln.pal2nal(@aln2)
43
+ aln3.sequences[1].to_s.size.should == 1615
44
44
  end
@@ -1,10 +1,13 @@
1
1
  # Alignment
2
2
 
3
+ require 'bio-alignment/pal2nal'
4
+
3
5
  module Bio
4
6
  module BioAlignment
5
7
 
6
8
  class Alignment
7
9
  include Enumerable
10
+ include Pal2Nal
8
11
 
9
12
  attr_accessor :sequences
10
13
 
@@ -4,12 +4,13 @@ require 'bio'
4
4
  module Bio
5
5
  module BioAlignment
6
6
 
7
- CODON_TABLE = Bio::CodonTable[1] # BioRuby Eukaryote table
8
-
9
- # Codon element for the matrix
7
+ # Codon element for the matrix, used by CodonSequence.
10
8
  class Codon
11
- def initialize codon
9
+ attr_reader :codon_table
10
+
11
+ def initialize codon, codon_table = 1
12
12
  @codon = codon
13
+ @codon_table = codon_table
13
14
  end
14
15
 
15
16
  def gap?
@@ -17,7 +18,7 @@ module Bio
17
18
  end
18
19
 
19
20
  def undefined?
20
- aa = CODON_TABLE[@codon]
21
+ aa = translate
21
22
  if aa == nil and not gap?
22
23
  return true
23
24
  end
@@ -30,8 +31,8 @@ module Bio
30
31
 
31
32
  # lazily convert to Amino acid (once only)
32
33
  def to_aa
33
- @aa ||= CODON_TABLE[@codon]
34
- if not @aa
34
+ aa = translate
35
+ if not aa
35
36
  if gap?
36
37
  return '-'
37
38
  elsif undefined?
@@ -40,22 +41,32 @@ module Bio
40
41
  raise 'What?'
41
42
  end
42
43
  end
44
+ aa
45
+ end
46
+
47
+ private
48
+
49
+ def translate
50
+ @aa ||= Bio::CodonTable[@codon_table][@codon]
43
51
  @aa
44
52
  end
45
53
  end
46
54
 
47
55
  # A CodonSequence supports the concept of codons (triple
48
- # nucleotides) for an alignment
56
+ # nucleotides) for an alignment. A codon table number can be passed
57
+ # in for translation of nucleotide sequences. This is the same
58
+ # table used in BioRuby.
49
59
  #
50
60
  class CodonSequence
51
61
  include Enumerable
52
62
 
53
63
  attr_reader :id, :seq
54
- def initialize id, seq
64
+ def initialize id, seq, options = { :codon_table => 1 }
55
65
  @id = id
56
66
  @seq = []
67
+ @codon_table = options[:codon_table]
57
68
  seq.scan(/\S\S\S/).each do | codon |
58
- @seq << Codon.new(codon)
69
+ @seq << Codon.new(codon, @codon_table)
59
70
  end
60
71
  end
61
72
 
@@ -0,0 +1,36 @@
1
+ # pal2nal (protein alignment to nucleotide alignment) implementation in Ruby
2
+
3
+ module Bio
4
+ module BioAlignment
5
+ module Pal2Nal
6
+ def pal2nal nt_aln
7
+ aa_aln = self
8
+ codon_aln = Alignment.new
9
+ aa_aln.each_with_index do | aaseq, i |
10
+ ntseq = nt_aln.sequences[i]
11
+ raise "pal2nal sequence IDs do not match (for #{aaseq.id} != #{ntseq.id})" if aaseq.id != ntseq.id
12
+ raise "pal2nal sequence size does not match (for #{aaseq.id}'s #{aaseq.to_s.size}!= #{ntseq.to_s.size * 3})" if aaseq.id != ntseq.id
13
+ codonseq = CodonSequence.new(ntseq.id, ntseq.seq)
14
+
15
+ codon_pos = 0
16
+ result = []
17
+ aaseq.each do | aa |
18
+ result <<
19
+ if aa.gap?
20
+ '---' # inject codon gap
21
+ else
22
+ codon = codonseq[codon_pos]
23
+ # validate codon translates to amino acid
24
+ raise "codon does not match amino acid (for #{aaseq.id}, position #{codon_pos}, #{codon} translates to #{codon.to_aa} instead of #{aa.to_s})" if codon.to_aa != aa.to_s
25
+ codon_pos += 1
26
+ codon.to_s
27
+ end
28
+ end
29
+ codon_seq = CodonSequence.new(aaseq.id, result.join(''))
30
+ codon_aln.sequences << codon_seq
31
+ end
32
+ codon_aln
33
+ end
34
+ end
35
+ end
36
+ end
@@ -9,6 +9,9 @@ module Bio
9
9
  def gap?
10
10
  @c == '-'
11
11
  end
12
+ def to_s
13
+ @c
14
+ end
12
15
  end
13
16
 
14
17
  # A Sequence is a simple container for String sequences/lists
@@ -3,7 +3,27 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  require 'bigbio'
4
4
  include Bio::BioAlignment # Namespace
5
5
 
6
- describe "BioAlignment" do
6
+ describe "BioAlignment::CodonSequence" do
7
+ it "should support different codon tables" do
8
+ seq = CodonSequence.new("test", "atgcccagacgattgg")
9
+ seq[0].to_aa.should == "M"
10
+ seq[2].to_s.should == "aga"
11
+ seq[2].to_aa.should == "R"
12
+
13
+ seq5 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 5)
14
+ seq5[2].codon_table.should == 5
15
+ seq5[0].to_aa.should == "M"
16
+ seq5[2].to_s.should == "aga"
17
+ seq5[2].to_aa.should == "S"
18
+
19
+ seq2 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 2)
20
+ seq2[2].codon_table.should == 2
21
+ seq2[0].to_aa.should == "M"
22
+ seq2[2].to_aa.should == "*"
23
+ end
24
+ end
25
+
26
+ describe "BioAlignment::Alignment" do
7
27
 
8
28
  it "should allow for adding FastaRecords that contain and id and seq" do
9
29
  aln = Alignment.new
@@ -15,7 +35,7 @@ describe "BioAlignment" do
15
35
  aln.sequences.first.seq[0..15].should == "atgcccactcgattgg"
16
36
  end
17
37
 
18
- it "should allow CodonSequence inputs" do
38
+ it "should allow CodonSequence as an input" do
19
39
  aln = Alignment.new
20
40
  fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
21
41
  fasta.each do | rec |
@@ -29,4 +49,6 @@ describe "BioAlignment" do
29
49
  aln.sequences.first.seq[0].to_aa.should == "M"
30
50
  aln.sequences.first.seq[2].to_aa.should == "T"
31
51
  end
52
+
32
53
  end
54
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-03 00:00:00.000000000Z
12
+ date: 2012-02-04 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio-logger
16
- requirement: &13944060 !ruby/object:Gem::Requirement
16
+ requirement: &23657180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *13944060
24
+ version_requirements: *23657180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bio
27
- requirement: &13932960 !ruby/object:Gem::Requirement
27
+ requirement: &23655840 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.4.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *13932960
35
+ version_requirements: *23655840
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: bio-bigbio
38
- requirement: &13932060 !ruby/object:Gem::Requirement
38
+ requirement: &23654620 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>'
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.1.3
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *13932060
46
+ version_requirements: *23654620
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: cucumber
49
- requirement: &13930840 !ruby/object:Gem::Requirement
49
+ requirement: &23653020 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *13930840
57
+ version_requirements: *23653020
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &13929020 !ruby/object:Gem::Requirement
60
+ requirement: &23651800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.3.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *13929020
68
+ version_requirements: *23651800
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
- requirement: &13927920 !ruby/object:Gem::Requirement
71
+ requirement: &23650920 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.0
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *13927920
79
+ version_requirements: *23650920
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: jeweler
82
- requirement: &13926060 !ruby/object:Gem::Requirement
82
+ requirement: &23650280 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: 1.7.0
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *13926060
90
+ version_requirements: *23650280
91
91
  description: Alignment handler for multiple sequence alignments (MSA)
92
92
  email: pjotr.public01@thebird.nl
93
93
  executables:
@@ -113,6 +113,7 @@ files:
113
113
  - lib/bio-alignment.rb
114
114
  - lib/bio-alignment/alignment.rb
115
115
  - lib/bio-alignment/codonsequence.rb
116
+ - lib/bio-alignment/pal2nal.rb
116
117
  - lib/bio-alignment/sequence.rb
117
118
  - spec/bio-alignment_spec.rb
118
119
  - spec/spec_helper.rb
@@ -137,7 +138,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
137
138
  version: '0'
138
139
  segments:
139
140
  - 0
140
- hash: 3814717210769440369
141
+ hash: -3154039158347420524
141
142
  required_rubygems_version: !ruby/object:Gem::Requirement
142
143
  none: false
143
144
  requirements: