bio-alignment 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +3 -3
- data/VERSION +1 -1
- data/doc/bio-alignment-design.md +2 -0
- data/features/codon.feature +0 -1
- data/features/pal2nal-feature.rb +2 -2
- data/lib/bio-alignment/alignment.rb +3 -0
- data/lib/bio-alignment/codonsequence.rb +21 -10
- data/lib/bio-alignment/pal2nal.rb +36 -0
- data/lib/bio-alignment/sequence.rb +3 -0
- data/spec/bio-alignment_spec.rb +24 -2
- metadata +18 -17
data/README.md
CHANGED
@@ -82,14 +82,14 @@ Write a (simple) version of pal2nal would be something like
|
|
82
82
|
end
|
83
83
|
```
|
84
84
|
|
85
|
-
With aln1 and aln2, the library version is the shorter
|
85
|
+
With amino acid aln1 and nucleotide aln2 loaded, the library version is the shorter
|
86
86
|
|
87
87
|
```ruby
|
88
88
|
aln3 = aln1.pal2nal(aln2)
|
89
|
-
fasta3 = FastaWriter.new('nt-aln.fa')
|
90
|
-
aln3.each { | rec | fasta3.write(rec) }
|
91
89
|
```
|
92
90
|
|
91
|
+
aln3 containing the codon alignment.
|
92
|
+
|
93
93
|
The API documentation is online. For more code examples see ./spec/*.rb and
|
94
94
|
./features/*
|
95
95
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/doc/bio-alignment-design.md
CHANGED
data/features/codon.feature
CHANGED
data/features/pal2nal-feature.rb
CHANGED
@@ -39,6 +39,6 @@ Then /^I should be able to generate a codon alignment$/ do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
Then /^I should be able to generate a codon alignment directly with pal2nal$/ do
|
42
|
-
|
43
|
-
|
42
|
+
aln3 = @aln.pal2nal(@aln2)
|
43
|
+
aln3.sequences[1].to_s.size.should == 1615
|
44
44
|
end
|
@@ -4,12 +4,13 @@ require 'bio'
|
|
4
4
|
module Bio
|
5
5
|
module BioAlignment
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
# Codon element for the matrix
|
7
|
+
# Codon element for the matrix, used by CodonSequence.
|
10
8
|
class Codon
|
11
|
-
|
9
|
+
attr_reader :codon_table
|
10
|
+
|
11
|
+
def initialize codon, codon_table = 1
|
12
12
|
@codon = codon
|
13
|
+
@codon_table = codon_table
|
13
14
|
end
|
14
15
|
|
15
16
|
def gap?
|
@@ -17,7 +18,7 @@ module Bio
|
|
17
18
|
end
|
18
19
|
|
19
20
|
def undefined?
|
20
|
-
aa =
|
21
|
+
aa = translate
|
21
22
|
if aa == nil and not gap?
|
22
23
|
return true
|
23
24
|
end
|
@@ -30,8 +31,8 @@ module Bio
|
|
30
31
|
|
31
32
|
# lazily convert to Amino acid (once only)
|
32
33
|
def to_aa
|
33
|
-
|
34
|
-
if not
|
34
|
+
aa = translate
|
35
|
+
if not aa
|
35
36
|
if gap?
|
36
37
|
return '-'
|
37
38
|
elsif undefined?
|
@@ -40,22 +41,32 @@ module Bio
|
|
40
41
|
raise 'What?'
|
41
42
|
end
|
42
43
|
end
|
44
|
+
aa
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def translate
|
50
|
+
@aa ||= Bio::CodonTable[@codon_table][@codon]
|
43
51
|
@aa
|
44
52
|
end
|
45
53
|
end
|
46
54
|
|
47
55
|
# A CodonSequence supports the concept of codons (triple
|
48
|
-
# nucleotides) for an alignment
|
56
|
+
# nucleotides) for an alignment. A codon table number can be passed
|
57
|
+
# in for translation of nucleotide sequences. This is the same
|
58
|
+
# table used in BioRuby.
|
49
59
|
#
|
50
60
|
class CodonSequence
|
51
61
|
include Enumerable
|
52
62
|
|
53
63
|
attr_reader :id, :seq
|
54
|
-
def initialize id, seq
|
64
|
+
def initialize id, seq, options = { :codon_table => 1 }
|
55
65
|
@id = id
|
56
66
|
@seq = []
|
67
|
+
@codon_table = options[:codon_table]
|
57
68
|
seq.scan(/\S\S\S/).each do | codon |
|
58
|
-
@seq << Codon.new(codon)
|
69
|
+
@seq << Codon.new(codon, @codon_table)
|
59
70
|
end
|
60
71
|
end
|
61
72
|
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# pal2nal (protein alignment to nucleotide alignment) implementation in Ruby
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
module BioAlignment
|
5
|
+
module Pal2Nal
|
6
|
+
def pal2nal nt_aln
|
7
|
+
aa_aln = self
|
8
|
+
codon_aln = Alignment.new
|
9
|
+
aa_aln.each_with_index do | aaseq, i |
|
10
|
+
ntseq = nt_aln.sequences[i]
|
11
|
+
raise "pal2nal sequence IDs do not match (for #{aaseq.id} != #{ntseq.id})" if aaseq.id != ntseq.id
|
12
|
+
raise "pal2nal sequence size does not match (for #{aaseq.id}'s #{aaseq.to_s.size}!= #{ntseq.to_s.size * 3})" if aaseq.id != ntseq.id
|
13
|
+
codonseq = CodonSequence.new(ntseq.id, ntseq.seq)
|
14
|
+
|
15
|
+
codon_pos = 0
|
16
|
+
result = []
|
17
|
+
aaseq.each do | aa |
|
18
|
+
result <<
|
19
|
+
if aa.gap?
|
20
|
+
'---' # inject codon gap
|
21
|
+
else
|
22
|
+
codon = codonseq[codon_pos]
|
23
|
+
# validate codon translates to amino acid
|
24
|
+
raise "codon does not match amino acid (for #{aaseq.id}, position #{codon_pos}, #{codon} translates to #{codon.to_aa} instead of #{aa.to_s})" if codon.to_aa != aa.to_s
|
25
|
+
codon_pos += 1
|
26
|
+
codon.to_s
|
27
|
+
end
|
28
|
+
end
|
29
|
+
codon_seq = CodonSequence.new(aaseq.id, result.join(''))
|
30
|
+
codon_aln.sequences << codon_seq
|
31
|
+
end
|
32
|
+
codon_aln
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/spec/bio-alignment_spec.rb
CHANGED
@@ -3,7 +3,27 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
require 'bigbio'
|
4
4
|
include Bio::BioAlignment # Namespace
|
5
5
|
|
6
|
-
describe "BioAlignment" do
|
6
|
+
describe "BioAlignment::CodonSequence" do
|
7
|
+
it "should support different codon tables" do
|
8
|
+
seq = CodonSequence.new("test", "atgcccagacgattgg")
|
9
|
+
seq[0].to_aa.should == "M"
|
10
|
+
seq[2].to_s.should == "aga"
|
11
|
+
seq[2].to_aa.should == "R"
|
12
|
+
|
13
|
+
seq5 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 5)
|
14
|
+
seq5[2].codon_table.should == 5
|
15
|
+
seq5[0].to_aa.should == "M"
|
16
|
+
seq5[2].to_s.should == "aga"
|
17
|
+
seq5[2].to_aa.should == "S"
|
18
|
+
|
19
|
+
seq2 = CodonSequence.new("test", "atgcccagacgattgg", :codon_table => 2)
|
20
|
+
seq2[2].codon_table.should == 2
|
21
|
+
seq2[0].to_aa.should == "M"
|
22
|
+
seq2[2].to_aa.should == "*"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe "BioAlignment::Alignment" do
|
7
27
|
|
8
28
|
it "should allow for adding FastaRecords that contain and id and seq" do
|
9
29
|
aln = Alignment.new
|
@@ -15,7 +35,7 @@ describe "BioAlignment" do
|
|
15
35
|
aln.sequences.first.seq[0..15].should == "atgcccactcgattgg"
|
16
36
|
end
|
17
37
|
|
18
|
-
it "should allow CodonSequence
|
38
|
+
it "should allow CodonSequence as an input" do
|
19
39
|
aln = Alignment.new
|
20
40
|
fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
|
21
41
|
fasta.each do | rec |
|
@@ -29,4 +49,6 @@ describe "BioAlignment" do
|
|
29
49
|
aln.sequences.first.seq[0].to_aa.should == "M"
|
30
50
|
aln.sequences.first.seq[2].to_aa.should == "T"
|
31
51
|
end
|
52
|
+
|
32
53
|
end
|
54
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-04 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio-logger
|
16
|
-
requirement: &
|
16
|
+
requirement: &23657180 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *23657180
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bio
|
27
|
-
requirement: &
|
27
|
+
requirement: &23655840 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.4.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *23655840
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bio-bigbio
|
38
|
-
requirement: &
|
38
|
+
requirement: &23654620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>'
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.1.3
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *23654620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: cucumber
|
49
|
-
requirement: &
|
49
|
+
requirement: &23653020 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *23653020
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &23651800 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 2.3.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *23651800
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: bundler
|
71
|
-
requirement: &
|
71
|
+
requirement: &23650920 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.0.0
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *23650920
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: jeweler
|
82
|
-
requirement: &
|
82
|
+
requirement: &23650280 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ~>
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: 1.7.0
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *23650280
|
91
91
|
description: Alignment handler for multiple sequence alignments (MSA)
|
92
92
|
email: pjotr.public01@thebird.nl
|
93
93
|
executables:
|
@@ -113,6 +113,7 @@ files:
|
|
113
113
|
- lib/bio-alignment.rb
|
114
114
|
- lib/bio-alignment/alignment.rb
|
115
115
|
- lib/bio-alignment/codonsequence.rb
|
116
|
+
- lib/bio-alignment/pal2nal.rb
|
116
117
|
- lib/bio-alignment/sequence.rb
|
117
118
|
- spec/bio-alignment_spec.rb
|
118
119
|
- spec/spec_helper.rb
|
@@ -137,7 +138,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
138
|
version: '0'
|
138
139
|
segments:
|
139
140
|
- 0
|
140
|
-
hash:
|
141
|
+
hash: -3154039158347420524
|
141
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
143
|
none: false
|
143
144
|
requirements:
|