bio-alignment 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +3 -3
- data/VERSION +1 -1
- data/doc/bio-alignment-design.md +2 -0
- data/features/codon.feature +0 -1
- data/features/pal2nal-feature.rb +2 -2
- data/lib/bio-alignment/alignment.rb +3 -0
- data/lib/bio-alignment/codonsequence.rb +21 -10
- data/lib/bio-alignment/pal2nal.rb +36 -0
- data/lib/bio-alignment/sequence.rb +3 -0
- data/spec/bio-alignment_spec.rb +24 -2
- metadata +18 -17
data/README.md
CHANGED
@@ -82,14 +82,14 @@ Write a (simple) version of pal2nal would be something like
|
|
82
82
|
end
|
83
83
|
```
|
84
84
|
|
85
|
-
With aln1 and aln2, the library version is the shorter
|
85
|
+
With amino acid aln1 and nucleotide aln2 loaded, the library version is the shorter
|
86
86
|
|
87
87
|
```ruby
|
88
88
|
aln3 = aln1.pal2nal(aln2)
|
89
|
-
fasta3 = FastaWriter.new('nt-aln.fa')
|
90
|
-
aln3.each { | rec | fasta3.write(rec) }
|
91
89
|
```
|
92
90
|
|
91
|
+
aln3 containing the codon alignment.
|
92
|
+
|
93
93
|
The API documentation is online. For more code examples see ./spec/*.rb and
|
94
94
|
./features/*
|
95
95
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/doc/bio-alignment-design.md
CHANGED
data/features/codon.feature
CHANGED
data/features/pal2nal-feature.rb
CHANGED
@@ -39,6 +39,6 @@ Then /^I should be able to generate a codon alignment$/ do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
Then /^I should be able to generate a codon alignment directly with pal2nal$/ do
|
42
|
-
|
43
|
-
|
42
|
+
aln3 = @aln.pal2nal(@aln2)
|
43
|
+
aln3.sequences[1].to_s.size.should == 1615
|
44
44
|
end
|
@@ -4,12 +4,13 @@ require 'bio'
|
|
4
4
|
module Bio
|
5
5
|
module BioAlignment
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
# Codon element for the matrix
|
7
|
+
# Codon element for the matrix, used by CodonSequence.
|
10
8
|
class Codon
|
11
|
-
|
9
|
+
attr_reader :codon_table
|
10
|
+
|
11
|
+
def initialize codon, codon_table = 1
|
12
12
|
@codon = codon
|
13
|
+
@codon_table = codon_table
|
13
14
|
end
|
14
15
|
|
15
16
|
def gap?
|
@@ -17,7 +18,7 @@ module Bio
|
|
17
18
|
end
|
18
19
|
|
19
20
|
def undefined?
|
20
|
-
aa =
|
21
|
+
aa = translate
|
21
22
|
if aa == nil and not gap?
|
22
23
|
return true
|
23
24
|
end
|
@@ -30,8 +31,8 @@ module Bio
|
|
30
31
|
|
31
32
|
# lazily convert to Amino acid (once only)
|
32
33
|
def to_aa
|
33
|
-
|
34
|
-
if not
|
34
|
+
aa = translate
|
35
|
+
if not aa
|
35
36
|
if gap?
|
36
37
|
return '-'
|
37
38
|
elsif undefined?
|
@@ -40,22 +41,32 @@ module Bio
|
|
40
41
|
raise 'What?'
|
41
42
|
end
|
42
43
|
end
|
44
|
+
aa
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def translate
|
50
|
+
@aa ||= Bio::CodonTable[@codon_table][@codon]
|
43
51
|
@aa
|
44
52
|
end
|
45
53
|
end
|
46
54
|
|
47
55
|
# A CodonSequence supports the concept of codons (triple
|
48
|
-
# nucleotides) for an alignment
|
56
|
+
# nucleotides) for an alignment. A codon table number can be passed
|
57
|
+
# in for translation of nucleotide sequences. This is the same
|
58
|
+
# table used in BioRuby.
|
49
59
|
#
|
50
60
|
class CodonSequence
|
51
61
|
include Enumerable
|
52
62
|
|
53
63
|
attr_reader :id, :seq
|
54
|
-
def initialize id, seq
|
64
|
+
def initialize id, seq, options = { :codon_table => 1 }
|
55
65
|
@id = id
|
56
66
|
@seq = []
|
67
|
+
@codon_table = options[:codon_table]
|
57
68
|
seq.scan(/\S\S\S/).each do | codon |
|
58
|
-
@seq << Codon.new(codon)
|
69
|
+
@seq << Codon.new(codon, @codon_table)
|
59
70
|
end
|
60
71
|
end
|
61
72
|
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# pal2nal (protein alignment to nucleotide alignment) implementation in Ruby
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
module BioAlignment
|
5
|
+
module Pal2Nal
|
6
|
+
def pal2nal nt_aln
|
7
|
+
aa_aln = self
|
8
|
+
codon_aln = Alignment.new
|
9
|
+
aa_aln.each_with_index do | aaseq, i |
|
10
|
+
ntseq = nt_aln.sequences[i]
|
11
|
+
raise "pal2nal sequence IDs do not match (for #{aaseq.id} != #{ntseq.id})" if aaseq.id != ntseq.id
|
12
|
+
raise "pal2nal sequence size does not match (for #{aaseq.id}'s #{aaseq.to_s.size}!= #{ntseq.to_s.size * 3})" if aaseq.id != ntseq.id
|
13
|
+
codonseq = CodonSequence.new(ntseq.id, ntseq.seq)
|
14
|
+
|
15
|
+
codon_pos = 0
|
16
|
+
result = []
|
17
|
+
aaseq.each do | aa |
|
18
|
+
result <<
|
19
|
+
if aa.gap?
|
20
|
+
'---' # inject codon gap
|
21
|
+
else
|
22
|
+
codon = codonseq[codon_pos]
|
23
|
+
# validate codon translates to amino acid
|
24
|
+
raise "codon does not match amino acid (for #{aaseq.id}, position #{codon_pos}, #{codon} translates to #{codon.to_aa} instead of #{aa.to_s})" if codon.to_aa != aa.to_s
|
25
|
+
codon_pos += 1
|
26
|
+
codon.to_s
|
27
|
+
end
|
28
|
+
end
|
29
|
+
codon_seq = CodonSequence.new(aaseq.id, result.join(''))
|
30
|
+
codon_aln.sequences << codon_seq
|
31
|
+
end
|
32
|
+
codon_aln
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/spec/bio-alignment_spec.rb
CHANGED
@@ -3,7 +3,27 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
require 'bigbio'
|
4
4
|
include Bio::BioAlignment # Namespace
|
5
5
|
|
6
|
-
describe "BioAlignment" do
|
6
|
+
describe "BioAlignment::CodonSequence" do
|
7
|
+
it "should support different codon tables" do
|
8
|
+
seq = CodonSequence.new("test", "atgcccagacgattgg")
|
9
|
+
seq[0].to_aa.should == "M"
|
10
|
+
seq[2].to_s.should == "aga"
|
11
|
+
seq[2].to_aa.should == "R"
|
12
|
+
|
13
|
+
seq5 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 5)
|
14
|
+
seq5[2].codon_table.should == 5
|
15
|
+
seq5[0].to_aa.should == "M"
|
16
|
+
seq5[2].to_s.should == "aga"
|
17
|
+
seq5[2].to_aa.should == "S"
|
18
|
+
|
19
|
+
seq2 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 2)
|
20
|
+
seq2[2].codon_table.should == 2
|
21
|
+
seq2[0].to_aa.should == "M"
|
22
|
+
seq2[2].to_aa.should == "*"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe "BioAlignment::Alignment" do
|
7
27
|
|
8
28
|
it "should allow for adding FastaRecords that contain and id and seq" do
|
9
29
|
aln = Alignment.new
|
@@ -15,7 +35,7 @@ describe "BioAlignment" do
|
|
15
35
|
aln.sequences.first.seq[0..15].should == "atgcccactcgattgg"
|
16
36
|
end
|
17
37
|
|
18
|
-
it "should allow CodonSequence
|
38
|
+
it "should allow CodonSequence as an input" do
|
19
39
|
aln = Alignment.new
|
20
40
|
fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
|
21
41
|
fasta.each do | rec |
|
@@ -29,4 +49,6 @@ describe "BioAlignment" do
|
|
29
49
|
aln.sequences.first.seq[0].to_aa.should == "M"
|
30
50
|
aln.sequences.first.seq[2].to_aa.should == "T"
|
31
51
|
end
|
52
|
+
|
32
53
|
end
|
54
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-04 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio-logger
|
16
|
-
requirement: &
|
16
|
+
requirement: &23657180 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *23657180
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bio
|
27
|
-
requirement: &
|
27
|
+
requirement: &23655840 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.4.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *23655840
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bio-bigbio
|
38
|
-
requirement: &
|
38
|
+
requirement: &23654620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>'
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.1.3
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *23654620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: cucumber
|
49
|
-
requirement: &
|
49
|
+
requirement: &23653020 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *23653020
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &23651800 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 2.3.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *23651800
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: bundler
|
71
|
-
requirement: &
|
71
|
+
requirement: &23650920 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.0.0
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *23650920
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: jeweler
|
82
|
-
requirement: &
|
82
|
+
requirement: &23650280 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ~>
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: 1.7.0
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *23650280
|
91
91
|
description: Alignment handler for multiple sequence alignments (MSA)
|
92
92
|
email: pjotr.public01@thebird.nl
|
93
93
|
executables:
|
@@ -113,6 +113,7 @@ files:
|
|
113
113
|
- lib/bio-alignment.rb
|
114
114
|
- lib/bio-alignment/alignment.rb
|
115
115
|
- lib/bio-alignment/codonsequence.rb
|
116
|
+
- lib/bio-alignment/pal2nal.rb
|
116
117
|
- lib/bio-alignment/sequence.rb
|
117
118
|
- spec/bio-alignment_spec.rb
|
118
119
|
- spec/spec_helper.rb
|
@@ -137,7 +138,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
138
|
version: '0'
|
138
139
|
segments:
|
139
140
|
- 0
|
140
|
-
hash:
|
141
|
+
hash: -3154039158347420524
|
141
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
143
|
none: false
|
143
144
|
requirements:
|