bio-alignment 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/Gemfile +5 -4
  2. data/README.md +94 -9
  3. data/Rakefile +2 -1
  4. data/VERSION +1 -1
  5. data/doc/bio-alignment-design.md +75 -11
  6. data/features/bioruby-feature.rb +17 -0
  7. data/features/bioruby.feature +6 -1
  8. data/features/columns-feature.rb +2 -0
  9. data/features/edit/del_bridges-feature.rb +7 -3
  10. data/features/edit/del_bridges.feature +1 -2
  11. data/features/edit/del_non_informative_sequences-feature.rb +26 -0
  12. data/features/edit/del_non_informative_sequences.feature +19 -0
  13. data/features/edit/del_short_sequences-feature.rb +21 -0
  14. data/features/edit/del_short_sequences.feature +25 -0
  15. data/features/edit/gblocks-feature.rb +2 -2
  16. data/features/edit/mask_islands-feature.rb +17 -4
  17. data/features/edit/mask_islands.feature +28 -17
  18. data/features/edit/mask_serial_mutations-feature.rb +8 -6
  19. data/features/edit/mask_serial_mutations.feature +11 -11
  20. data/features/tree-feature.rb +66 -0
  21. data/features/tree.feature +45 -0
  22. data/lib/bio-alignment.rb +4 -1
  23. data/lib/bio-alignment/alignment.rb +58 -3
  24. data/lib/bio-alignment/codonsequence.rb +14 -2
  25. data/lib/bio-alignment/columns.rb +102 -0
  26. data/lib/bio-alignment/edit/del_bridges.rb +18 -1
  27. data/lib/bio-alignment/edit/del_non_informative_sequences.rb +27 -0
  28. data/lib/bio-alignment/edit/del_short_sequences.rb +28 -0
  29. data/lib/bio-alignment/edit/edit_columns.rb +22 -0
  30. data/lib/bio-alignment/edit/edit_rows.rb +49 -0
  31. data/lib/bio-alignment/edit/mask_islands.rb +115 -0
  32. data/lib/bio-alignment/edit/mask_serial_mutations.rb +44 -0
  33. data/lib/bio-alignment/elements.rb +86 -0
  34. data/lib/bio-alignment/rows.rb +52 -0
  35. data/lib/bio-alignment/sequence.rb +20 -14
  36. data/lib/bio-alignment/state.rb +64 -8
  37. data/lib/bio-alignment/tree.rb +77 -0
  38. data/spec/bio-alignment_spec.rb +57 -1
  39. data/spec/spec_helper.rb +3 -3
  40. metadata +47 -22
  41. data/lib/bio-alignment/column.rb +0 -47
@@ -0,0 +1,25 @@
1
+ Feature: Alignment editing; remove short sequences
2
+ Remove rows that are too short (short sequences)
3
+
4
+ The dropped sequences are tracked by the row state objects
5
+
6
+ Scenario: Apply short sequence rule to an amino acid alignment
7
+ Given I have a bridged alignment
8
+ """
9
+ SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
10
+ SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
11
+ ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
12
+ ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
13
+ -------------IFHAVR-TC-HP-----------------
14
+ """
15
+ When I apply the short sequence rule
16
+ Then it should have removed one row
17
+ """
18
+ SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
19
+ SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
20
+ ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
21
+ ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
22
+ """
23
+ Then I should be able to track removed rows
24
+
25
+
@@ -1,5 +1,5 @@
1
1
  When /^I apply GBlocks$/ do
2
- pending # express the regexp above with the code you wish you had
2
+ # pending # express the regexp above with the code you wish you had
3
3
  end
4
4
 
5
5
  Then /^it should return the GBlocks cleaned alignment$/ do
@@ -7,7 +7,7 @@ Then /^it should return the GBlocks cleaned alignment$/ do
7
7
  end
8
8
 
9
9
  Then /^return a list of removed columns$/ do
10
- pending # express the regexp above with the code you wish you had
10
+ # pending # express the regexp above with the code you wish you had
11
11
  end
12
12
 
13
13
 
@@ -1,12 +1,25 @@
1
1
  require 'bio-alignment'
2
+ require 'bio-alignment/edit/mask_islands'
2
3
 
3
- When /^I apply island rule with max_gap_size (\d+)$/ do |arg1|
4
- pending # express the regexp above with the code you wish you had
4
+
5
+ Given /^I have an alignment with islands$/ do |string|
6
+ @aln = Alignment.new(string.split(/\n/))
5
7
  end
6
8
 
7
- Then /^it should result in$/ do |string|
8
- pending # express the regexp above with the code you wish you had
9
+ When /^I apply island rule with max_gap_size (\d+)$/ do |arg1|
10
+ @aln.extend MaskIslands
11
+ @marked_aln = @aln.mark_islands
9
12
  end
10
13
 
14
+ Then /^it should have masked islands$/ do |string|
15
+ check_aln = Alignment.new(string.split(/\n/))
16
+ new_aln = @marked_aln.update_each_element { |e| (e.state.masked? ? Element.new("X"):e)}
17
+ new_aln.to_s.should == check_aln.to_s
18
+ end
11
19
 
20
+ Then /^it should also be able to delete islands$/ do |string|
21
+ check_aln = Alignment.new(string.split(/\n/))
22
+ new_aln = @marked_aln.update_each_element { |e| (e.state.masked? ? Element.new("-"):e)}
23
+ new_aln.to_s.should == check_aln.to_s
24
+ end
12
25
 
@@ -1,42 +1,53 @@
1
+ @dev
1
2
  Feature: Alignment editing with the Island rule
2
- The idea is to drop hypervariable floating sequences, as they are probably
3
- misaligned.
3
+ The idea is to drop hypervariable 'floating' sequences, or islands, as
4
+ they are possibly/probably misaligned.
4
5
 
5
- Drop all 'islands' in a sequence with low island consensus, that show a gap
6
- larger than 'max_gap_size' (default 6) on both sides, and are shorter than
7
- 'min_island_size' (default 30). The latter may be a large size, as an island
8
- needs to loop in and out several times to be (arguably) functional. We also
9
- add a parameter 'max_gap_size_inside' (default 2) which allows for small gaps
10
- inside the island - though the total island size is calculated including
11
- those small gaps.
6
+ Drop all 'islands' in a sequence with low consensus, that show a gap larger
7
+ than 'min_gap_size' (default 3) on both sides, and are shorter than
8
+ 'max_island_size' (default 30). An island larger than 30 elements is arguably
9
+ no longer an island, and low consensus stretches may be loops - it is up to
10
+ the alignment procedure to get that right. We also allow for micro deletions
11
+ inside an alignment (1 or 2 elements).
12
12
 
13
- The island consensus is calculated by column.
14
- 'max_island_elements_unique_percentage' (default 10%) of elements in the
15
- island should have a 'min_island_column_matched' (default 1) somewhere in the
16
- element's column.
13
+ The island consensus is calculated by column. If more than 50% of the island
14
+ shows consensus, the island is retained. Consensus for each element is
15
+ defined here as the number of matches in the column (default 1).
17
16
 
18
17
  Scenario: Apply island rule to an amino acid alignment
19
- Given I have an alignment
18
+ Given I have an alignment with islands
20
19
  """
21
20
  ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
22
21
  SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
23
22
  SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
24
23
  ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
25
24
  ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
26
- ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
25
+ ----------PTIIFSGCSKACSGK-----FRSFRSFRSFRS
27
26
  ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
28
27
  ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
29
28
  -------------IFHAVR-TC-HP-----------------
30
29
  """
31
30
  When I apply island rule with max_gap_size 4
32
- Then it should have removed 2 islands
31
+ Then it should have masked islands
33
32
  """
34
33
  ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
35
34
  SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
36
35
  SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
37
36
  ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
38
37
  ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
39
- ----------PTIIFSGCSKACSGK-----VCGFRSFMLSAV
38
+ ----------PTIIFSGCSKACSGK-----XXXXXXXXXXXX
39
+ ----------PTIIFSGCSKACSGK-----XXXXXXXXXXXX
40
+ ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
41
+ -------------XXXXXX-XX-XX-----------------
42
+ """
43
+ Then it should also be able to delete islands
44
+ """
45
+ ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
46
+ SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
47
+ SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
48
+ ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
49
+ ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
50
+ ----------PTIIFSGCSKACSGK-----------------
40
51
  ----------PTIIFSGCSKACSGK-----------------
41
52
  ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
42
53
  ------------------------------------------
@@ -1,16 +1,18 @@
1
1
  require 'bio-alignment'
2
+ require 'bio-alignment/edit/mask_serial_mutations'
2
3
 
3
4
  Given /^I have an alignment$/ do |string|
4
5
  @aln = Alignment.new(string.split(/\n/))
5
- p @aln
6
6
  end
7
7
 
8
- When /^I apply rule masking with X and max_gap_size (\d+)$/ do |arg1|
9
- pending # express the regexp above with the code you wish you had
8
+ When /^I apply rule masking with X$/ do
9
+ @aln.extend MaskSerialMutations
10
+ @marked_aln = @aln.mark_serial_mutations
10
11
  end
11
12
 
12
- Then /^it should have removed (\d+) islands$/ do |arg1, string|
13
- pending # express the regexp above with the code you wish you had
13
+ Then /^mask serial mutations should result in$/ do |string|
14
+ check_aln = Alignment.new(string.split(/\n/))
15
+ new_aln = @marked_aln.update_each_element { |e| (e.state.masked ? Element.new("X"):e)}
16
+ new_aln.to_s.should == check_aln.to_s
14
17
  end
15
18
 
16
-
@@ -2,34 +2,34 @@ Feature: Alignment editing masking serial mutations
2
2
  Edit an alignment removing or masking unique elements column-wise.
3
3
 
4
4
  If a sequence has a unique AA in a column it is a single mutation event. If
5
- multiple neighbouring AA's are also unique we suspect the sequence is an
6
- outlier. This rule masks, or deletes, stretches of unique AAs. The stretch of
7
- unique AA's is defined in 'max_serial_unique' (default 5, so two bordering
8
- unique AA's are allowed).
5
+ multiple neighbouring AA's are also unique we suspect the (partial) sequence
6
+ may be an outlier. This rule masks, or deletes, stretches of totally unique
7
+ AAs. The stretch of unique AA's is defined in 'max_serial_unique' (default 5,
8
+ so two bordering unique AA's are allowed). Gaps within a series are allowed.
9
9
 
10
10
  Scenario: Apply rule to an amino acid alignment
11
11
  Given I have an alignment
12
12
  """
13
13
  ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
14
14
  SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
15
- SSIISNSFSRPTIIFSGCSTACSQQKLTSEQVCFR---LSDV
15
+ SSIISNSFSRPTIIFSGCSTACSQQKKTSEQVCFR---LSDV
16
16
  ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
17
17
  ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
18
18
  ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
19
19
  ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
20
20
  ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
21
- -------------IFHAVR-TC-HP-----------------
21
+ -------------TTTTTT-TT-HP-----------------
22
22
  """
23
- When I apply rule masking with X and max_gap_size 5
24
- Then it should result in
23
+ When I apply rule masking with X
24
+ Then mask serial mutations should result in
25
25
  """
26
26
  ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
27
27
  SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
28
- SSIISNSFSRPTIIFSGCSTACXXXXXXXXXXXFR---LSDV
28
+ SSIISNSFSRPTIIFSGCSTACSXXXXXXXXXXFR---LSDV
29
29
  ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
30
30
  ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
31
- ----------PTIIFSGCSKACSGK-----VCGFRSFMLSAV
32
- ----------PTIIFSGCSKACSGK-----------------
31
+ ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
32
+ ----------PTIIFSGCSKACSGK-----VCGXXXXXXSXX
33
33
  ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
34
34
  -------------XXXXXX-XX-XX-----------------
35
35
  """
@@ -0,0 +1,66 @@
1
+ require 'bio' # for the Newick tree parser
2
+ require 'bio-alignment'
3
+
4
+ Given /^I have a multiple sequence alignment \(MSA\)$/ do |string|
5
+ list = string.split(/\n/)
6
+ seqs = list.map { | line | line.split(' ')[1] }
7
+ ids = list.map { | line | line.split(' ')[0] }
8
+ @aln = Alignment.new(seqs, ids)
9
+ print @aln
10
+ end
11
+
12
+ Given /^I have a phylogenetic tree in Newick format$/ do |string|
13
+ @tree = Bio::Newick.new(string).tree
14
+ tree = @tree
15
+ tree.children(tree.root).size.should == 2
16
+ tree.descendents(tree.root).size.should == 14
17
+ tree.leaves.size.should == 8
18
+ leaf = tree.get_node_by_name('seq8')
19
+ leaf.name.should == "seq8"
20
+ tree.ancestors(leaf).size.should == 5
21
+ tree.get_edge(leaf, tree.parent(leaf)).distance.should == 1.1904755
22
+ tree.get_edge(tree.parent(leaf), tree.parent(tree.parent(leaf))).distance.should == 1.7857151
23
+ end
24
+
25
+ Then /^I should be able to traverse the tree$/ do
26
+ tree = @aln.attach_tree(@tree)
27
+ root = @aln.root # get the root of the tree
28
+ root.leaf?.should == false
29
+ children = root.children
30
+ children.map { |n| n.name }.sort.should == ["","seq7"]
31
+ seq7 = children.last
32
+ seq7.name.should == 'seq7'
33
+ seq7.leaf?.should == true
34
+ seq7.parent.should == root
35
+ seq4 = tree.find("seq4")
36
+ seq4.leaf?.should == true
37
+ seq4.distance(seq7).should == 19.387756600000003 # that is nice!
38
+ end
39
+
40
+ Then /^fetch elements from the MSA from each end node in the tree$/ do
41
+ # walk the tree
42
+ tree = @aln.attach_tree(@tree)
43
+ ids = []
44
+ column20 = tree.map { | leaf |
45
+ ids << leaf.name
46
+ seq = @aln.find(leaf.name)
47
+ # p seq
48
+ seq[19]
49
+ }
50
+ ids.should == ["seq6", "seq4", "seq8", "seq5", "seq3", "seq2", "seq1", "seq7"]
51
+ column20.should == ["K", "T", "K", "K", "T", "T", "T", "K"]
52
+ end
53
+
54
+ Then /^calculate the phylogenetic distance between each element$/ do
55
+ pending # express the regexp above with the code you wish you had
56
+ end
57
+
58
+ Then /^draw the MSA with the tree$/ do | string |
59
+ # textual drawing, like tabtree, or http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/149701
60
+ print string
61
+ pending # express the regexp above with the code you wish you had
62
+ end
63
+
64
+ Then /^draw MSA with the short tree$/ do |string|
65
+ pending # express the regexp above with the code you wish you had
66
+ end
@@ -0,0 +1,45 @@
1
+ @tree
2
+ Feature: Tree support for alignments
3
+ Alignments are often accompanied by phylogenetic trees.
4
+
5
+ Scenario: Get ordered elements from a tree
6
+ Given I have a multiple sequence alignment (MSA)
7
+ """
8
+ seq1 ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
9
+ seq2 SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
10
+ seq3 SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
11
+ seq4 ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
12
+ seq5 ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
13
+ seq6 ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
14
+ seq7 ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
15
+ seq8 ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
16
+ """
17
+ And I have a phylogenetic tree in Newick format
18
+ """
19
+ ((seq6:5.3571434,(seq4:4.04762,((seq8:1.1904755,seq5:1.1904755):1.7857151,((seq3:0.0,seq2:0.0):1.1904755,seq1:1.1904755):1.7857151):1.0714293):1.3095236):4.336735,seq7:9.693878);
20
+ """
21
+ Then I should be able to traverse the tree
22
+ And fetch elements from the MSA from each end node in the tree
23
+ And calculate the phylogenetic distance between each element
24
+ And draw the MSA with the tree
25
+ """
26
+ ,--9.69----------------------------------------- seq7 ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
27
+ | ,--1.19----- seq1 ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
28
+ | ,--1.79--| ,-- seq2 SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
29
+ | ,--1.07--| `--1.19--+-- seq3 SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
30
+ | | `--1.79--+--1.19----- seq5 ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
31
+ | ,--1.31--| `--1.19----- seq8 ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
32
+ `--4.34--| `--4.05----------------------- seq4 ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
33
+ `--5.36-------------------------------- seq6 ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
34
+ """
35
+ Then draw MSA with the short tree
36
+ """
37
+ ,----------------- seq7 ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
38
+ | ,----- seq1 ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
39
+ | ,--| ,-- seq2 SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
40
+ | ,--| `--+-- seq3 SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
41
+ | | `--+----- seq5 ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
42
+ | ,--| `----- seq8 ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
43
+ `--| `----------- seq4 ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
44
+ `-------------- seq6 ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
45
+ """
@@ -2,6 +2,9 @@
2
2
  # bioruby directory tree.
3
3
  #
4
4
 
5
- require 'bio-alignment/alignment'
5
+ require 'bio-alignment/state'
6
+ require 'bio-alignment/elements'
6
7
  require 'bio-alignment/sequence'
7
8
  require 'bio-alignment/codonsequence'
9
+ require 'bio-alignment/tree'
10
+ require 'bio-alignment/alignment'
@@ -1,7 +1,8 @@
1
1
  # Alignment
2
2
 
3
3
  require 'bio-alignment/pal2nal'
4
- require 'bio-alignment/column'
4
+ require 'bio-alignment/columns'
5
+ require 'bio-alignment/rows'
5
6
 
6
7
  module Bio
7
8
 
@@ -10,6 +11,7 @@ module Bio
10
11
  class Alignment
11
12
  include Enumerable
12
13
  include Pal2Nal
14
+ include Rows
13
15
  include Columns
14
16
 
15
17
  attr_accessor :sequences
@@ -17,16 +19,22 @@ module Bio
17
19
  # Create alignment. seqs can be a list of sequences. If these
18
20
  # are String types, they get converted to the library Sequence
19
21
  # container
20
- def initialize seqs = nil
22
+ def initialize seqs = nil, ids = nil
21
23
  @sequences = []
22
24
  if seqs
25
+ num = 0
23
26
  seqs.each_with_index do | seq, i |
27
+ next if seq == nil or seq.to_s.strip == ""
28
+ id = num
29
+ id = ids[i] if ids and ids[i]
24
30
  @sequences <<
25
31
  if seq.kind_of?(String)
26
- Sequence.new(i,seq)
32
+ seq1 = Sequence.new(id,seq.strip)
33
+ seq1
27
34
  else
28
35
  seq
29
36
  end
37
+ num += 1
30
38
  end
31
39
  end
32
40
  end
@@ -39,8 +47,55 @@ module Bio
39
47
 
40
48
  def each
41
49
  rows.each { | seq | yield seq }
50
+ self
42
51
  end
43
52
 
53
+ def each_element
54
+ each { |seq| seq.each { |e| yield e }}
55
+ self
56
+ end
57
+
58
+ def find name
59
+ each do | seq |
60
+ return seq if seq.id == name
61
+ end
62
+ raise "ERROR: Sequence not found by its name #{name}"
63
+ end
64
+
65
+ # clopy alignment and allow updating elements
66
+ def update_each_element
67
+ aln = self.clone
68
+ aln.each { |seq| seq.each_with_index { |e,i| seq.seq[i] = yield e }}
69
+ end
70
+
71
+ def to_s
72
+ res = ""
73
+ res += "\t" + columns_to_s + "\n" if @columns
74
+ res += map{ |seq| seq.id.to_s + "\t" + seq.to_s }.join("\n")
75
+ res
76
+ end
77
+
78
+ # Return a deep cloned alignment. This method clones sequences,
79
+ # and the state objects
80
+ def clone
81
+ aln = super
82
+ # clone the sequences
83
+ aln.sequences = []
84
+ each do | seq |
85
+ aln.sequences << seq.clone
86
+ end
87
+ aln.clone_columns! if @columns
88
+ aln
89
+ end
90
+
91
+ # extend BioAlignment with Tree functionality - this method adds
92
+ # a tree and pulls in the functionality of the Tree module. Returns
93
+ # the tree traverser
94
+ def attach_tree tree
95
+ extend Tree
96
+ @tree = Tree::init(tree)
97
+ @tree
98
+ end
44
99
  end
45
100
  end
46
101
  end
@@ -6,6 +6,9 @@ module Bio
6
6
 
7
7
  # Codon element for the matrix, used by CodonSequence.
8
8
  class Codon
9
+ GAP = '---'
10
+ UNDEFINED = 'X'
11
+
9
12
  attr_reader :codon_table
10
13
 
11
14
  def initialize codon, codon_table = 1
@@ -14,7 +17,7 @@ module Bio
14
17
  end
15
18
 
16
19
  def gap?
17
- @codon == '---'
20
+ @codon == GAP
18
21
  end
19
22
 
20
23
  def undefined?
@@ -36,7 +39,7 @@ module Bio
36
39
  if gap?
37
40
  return '-'
38
41
  elsif undefined?
39
- return 'X'
42
+ return UNDEFINED
40
43
  else
41
44
  raise 'What?'
42
45
  end
@@ -46,6 +49,7 @@ module Bio
46
49
 
47
50
  private
48
51
 
52
+ # lazy translation of codon to amino acid
49
53
  def translate
50
54
  @aa ||= Bio::CodonTable[@codon_table][@codon]
51
55
  @aa
@@ -95,6 +99,14 @@ module Bio
95
99
  def to_aa
96
100
  @seq.map { |codon| codon.to_aa }.join('')
97
101
  end
102
+
103
+ def empty_copy
104
+ CodonSequence.new(@id,"")
105
+ end
106
+
107
+ def << codon
108
+ @seq << codon
109
+ end
98
110
 
99
111
  end
100
112