bio-alignment 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/Gemfile +5 -4
  2. data/README.md +94 -9
  3. data/Rakefile +2 -1
  4. data/VERSION +1 -1
  5. data/doc/bio-alignment-design.md +75 -11
  6. data/features/bioruby-feature.rb +17 -0
  7. data/features/bioruby.feature +6 -1
  8. data/features/columns-feature.rb +2 -0
  9. data/features/edit/del_bridges-feature.rb +7 -3
  10. data/features/edit/del_bridges.feature +1 -2
  11. data/features/edit/del_non_informative_sequences-feature.rb +26 -0
  12. data/features/edit/del_non_informative_sequences.feature +19 -0
  13. data/features/edit/del_short_sequences-feature.rb +21 -0
  14. data/features/edit/del_short_sequences.feature +25 -0
  15. data/features/edit/gblocks-feature.rb +2 -2
  16. data/features/edit/mask_islands-feature.rb +17 -4
  17. data/features/edit/mask_islands.feature +28 -17
  18. data/features/edit/mask_serial_mutations-feature.rb +8 -6
  19. data/features/edit/mask_serial_mutations.feature +11 -11
  20. data/features/tree-feature.rb +66 -0
  21. data/features/tree.feature +45 -0
  22. data/lib/bio-alignment.rb +4 -1
  23. data/lib/bio-alignment/alignment.rb +58 -3
  24. data/lib/bio-alignment/codonsequence.rb +14 -2
  25. data/lib/bio-alignment/columns.rb +102 -0
  26. data/lib/bio-alignment/edit/del_bridges.rb +18 -1
  27. data/lib/bio-alignment/edit/del_non_informative_sequences.rb +27 -0
  28. data/lib/bio-alignment/edit/del_short_sequences.rb +28 -0
  29. data/lib/bio-alignment/edit/edit_columns.rb +22 -0
  30. data/lib/bio-alignment/edit/edit_rows.rb +49 -0
  31. data/lib/bio-alignment/edit/mask_islands.rb +115 -0
  32. data/lib/bio-alignment/edit/mask_serial_mutations.rb +44 -0
  33. data/lib/bio-alignment/elements.rb +86 -0
  34. data/lib/bio-alignment/rows.rb +52 -0
  35. data/lib/bio-alignment/sequence.rb +20 -14
  36. data/lib/bio-alignment/state.rb +64 -8
  37. data/lib/bio-alignment/tree.rb +77 -0
  38. data/spec/bio-alignment_spec.rb +57 -1
  39. data/spec/spec_helper.rb +3 -3
  40. metadata +47 -22
  41. data/lib/bio-alignment/column.rb +0 -47
@@ -0,0 +1,52 @@
1
+ require 'bio-alignment/state'
2
+
3
+ module Bio
4
+
5
+ module BioAlignment
6
+
7
+ # The Rows module provides accessors for the Row list
8
+ # returning Row objects
9
+ module Rows
10
+
11
+ # Return an copy of an alignment which matching rows. The originating
12
+ # sequences should have methods 'empty_copy' and '<<'
13
+ def rows_where &block
14
+ seqs = []
15
+ rows.each do | seq |
16
+ seqs << seq.clone if block.call(seq)
17
+ end
18
+ Alignment.new(seqs)
19
+ end
20
+
21
+ end
22
+
23
+ # Support the notion of Rows in an alignment. A Row
24
+ # can have state by attaching state objects
25
+ class Row
26
+ include State
27
+
28
+ def initialize aln, row
29
+ @aln = aln
30
+ @row = row
31
+ end
32
+
33
+ def count &block
34
+ counter = 0
35
+ each do | e |
36
+ found =
37
+ if e.kind_of?(String)
38
+ block.call(Element.new(e))
39
+ else
40
+ block.call(e)
41
+ end
42
+ counter += 1 if found
43
+ end
44
+ counter
45
+ end
46
+
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+
@@ -1,20 +1,8 @@
1
1
  module Bio
2
2
  module BioAlignment
3
3
 
4
- # Simple element that can be queried
5
- class Element
6
- def initialize c
7
- @c = c
8
- end
9
- def gap?
10
- @c == '-'
11
- end
12
- def to_s
13
- @c
14
- end
15
- end
16
-
17
- # A Sequence is a simple container for String sequences/lists
4
+ # A Sequence is a simple and efficient container for String sequences. To
5
+ # add state to elements unpack it into an Elements object with to_elements.
18
6
  #
19
7
  class Sequence
20
8
  include Enumerable
@@ -34,6 +22,8 @@ module Bio
34
22
  @seq.length
35
23
  end
36
24
 
25
+ # Return each element in the Sequence as an Element opbject, so it
26
+ # can be queried for gap? and undefined?
37
27
  def each
38
28
  @seq.each_char { | c | yield Element.new(c) }
39
29
  end
@@ -42,6 +32,22 @@ module Bio
42
32
  @seq.to_s
43
33
  end
44
34
 
35
+ def << element
36
+ @seq += element.to_s
37
+ end
38
+
39
+ def empty_copy
40
+ Sequence.new(@id,"")
41
+ end
42
+
43
+ def clone
44
+ Sequence.new(@id,@seq.clone)
45
+ end
46
+
47
+ # Return Sequence (string) as an Elements object
48
+ def to_elements
49
+ Elements.new(@id,@seq)
50
+ end
45
51
  end
46
52
  end
47
53
  end
@@ -6,24 +6,80 @@ module Bio
6
6
  attr_accessor :state
7
7
  end
8
8
 
9
- # Convenience class for tracking state. Note you can add
10
- # any class you like
11
- class ColumnState
9
+ module DeleteState
12
10
  attr_accessor :deleted
13
11
 
12
+ def delete!
13
+ @deleted = true
14
+ end
15
+
14
16
  def deleted?
15
- deleted == true
17
+ @deleted == true
18
+ end
19
+
20
+ def to_s
21
+ (deleted? ? 'X' : '.')
22
+ end
23
+ end
24
+
25
+ module MarkState
26
+ attr_accessor :marked
27
+
28
+ def mark!
29
+ @marked = true
30
+ end
31
+
32
+ def unmark!
33
+ @marked = false
34
+ end
35
+
36
+ def marked?
37
+ @marked == true
38
+ end
39
+
40
+ def to_s
41
+ (marked? ? 'X' : '.')
42
+ end
43
+ end
44
+
45
+ module MaskState
46
+ attr_accessor :masked
47
+
48
+ def mask!
49
+ @masked = true
50
+ end
51
+
52
+ def unmask!
53
+ @masked = false
54
+ end
55
+
56
+ def masked?
57
+ @masked == true
58
+ end
59
+
60
+ def to_s
61
+ (masked? ? 'X' : '.')
16
62
  end
17
63
  end
18
64
 
65
+ # Convenience class for tracking state. Note you can add
66
+ # any class you like
67
+ class ColumnState
68
+ include DeleteState
69
+ end
70
+
19
71
  # Convenience class for tracking state. Note you can add
20
72
  # any class you like
21
73
  class RowState
22
- attr_accessor :deleted
74
+ include DeleteState
75
+ end
23
76
 
24
- def deleted?
25
- deleted == true
26
- end
77
+ class ElementState
78
+ include MarkState
79
+ end
80
+
81
+ class ElementMaskedState
82
+ include MaskState
27
83
  end
28
84
 
29
85
  end
@@ -0,0 +1,77 @@
1
+ module Bio
2
+
3
+ module BioAlignment
4
+
5
+ # The Tree module turns a tree into a traversable object, by wrapping
6
+ # BioRuby's basic tree objects. The Bio::Tree object can always be
7
+ # fetched using to_bioruby_tree.
8
+
9
+ module Tree
10
+
11
+ class Node
12
+ end
13
+
14
+ # Make all nodes in the Bio::Tree aware of the tree object so we can use
15
+ # its methods
16
+ def Tree::init tree
17
+ if tree.kind_of?(Bio::Tree)
18
+ # walk all nodes and infect the tree info
19
+ tree.each_node do | node |
20
+ node.inject_tree(tree)
21
+ end
22
+ # tree.root.set_tree(tree)
23
+ else
24
+ raise "BioAlignment::Tree does not understand tree type "+tree.class.to_s
25
+ end
26
+ return tree
27
+ end
28
+
29
+ def root
30
+ @tree.root
31
+ end
32
+
33
+ end
34
+
35
+ end
36
+
37
+ # Here we add to BioRuby's Bio::Tree classes
38
+ class Tree
39
+ class Node
40
+ def inject_tree tree
41
+ @tree = tree
42
+ end
43
+
44
+ def leaf?
45
+ children.size == 0
46
+ end
47
+
48
+ def children
49
+ @tree.children(self)
50
+ end
51
+
52
+ def parent
53
+ @tree.parent(self)
54
+ end
55
+
56
+ # Get the distance to another node (FIXME: write test)
57
+ def distance other
58
+ @tree.distance(self,other)
59
+ end
60
+ end
61
+
62
+ def find name
63
+ get_node_by_name(name)
64
+ end
65
+
66
+ # Walk the ordered tree leaves, calling into the block, and return an array
67
+ def map
68
+ res = []
69
+ leaves.each do | leaf |
70
+ item = yield leaf
71
+ res << item
72
+ end
73
+ res
74
+ end
75
+
76
+ end
77
+ end
@@ -1,6 +1,7 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
- require 'bigbio'
3
+ require 'bio-alignment'
4
+ require 'bigbio' # for the FastaReader
4
5
  include Bio::BioAlignment # Namespace
5
6
 
6
7
  describe "BioAlignment::CodonSequence" do
@@ -49,6 +50,61 @@ describe "BioAlignment::Alignment" do
49
50
  aln.sequences.first.seq[0].to_aa.should == "M"
50
51
  aln.sequences.first.seq[2].to_aa.should == "T"
51
52
  end
53
+ end
54
+
52
55
 
56
+ describe "BioAlignment::DelBridges" do
57
+ require 'bio-alignment/edit/del_bridges'
58
+ string =
59
+ """
60
+ ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
61
+ SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
62
+ SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
63
+ ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
64
+ ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
65
+ ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
66
+ ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
67
+ ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
68
+ -------------IFHAVR-TC-HP-----------------
69
+ """
70
+ aln = Alignment.new(string.split(/\n/))
71
+ print aln.to_s,"\n"
72
+ columns = aln.columns
73
+ columns.should_not == nil
74
+ columns.should_not == []
75
+ columns.size.should == 42
76
+ # make sure we are using the same columns
77
+ aln.columns.should == columns
78
+ aln.extend DelBridges
79
+ aln2 = aln.mark_bridges
80
+ print aln2.to_s,"\n"
81
+ columns2 = aln2.columns
82
+ columns2.should_not == nil
83
+ columns2.should_not == []
84
+ columns2.count { |col| col.state.deleted? }.should == 6
85
+ aln2.columns[0].state.should == columns2[0].state
86
+ aln2.columns[0].state.should_not == columns[0].state
87
+ aln2.rows.first.to_s.should == "----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV"
88
+ # now write out the alignment with deleted columns removed
89
+ aln3 = aln2.columns_where { |col| !col.state.deleted? }
90
+ print aln3.to_s,"\n"
91
+ aln3.rows.first.to_s.should == "SNSFSRPTIIFSGCSTACSGKSELVCGFRSFMLSDV"
53
92
  end
54
93
 
94
+ describe "BioAlignment::DelBridges for codons" do
95
+ # We are going to do the same for a codon alignment
96
+ aln = Alignment.new
97
+ fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
98
+ fasta.each do | rec |
99
+ aln.sequences << CodonSequence.new(rec.id, rec.seq)
100
+ end
101
+ aln.extend DelBridges
102
+ aln2 = aln.mark_bridges
103
+ # print aln2[0].to_s,"\n"
104
+ aln2.columns.size.should == 404
105
+ # count deleted columns
106
+ aln2.columns.count { |col| col.state.deleted? }.should == 5
107
+ # create new alignment
108
+ aln3 = aln2.columns_where { |col| !col.state.deleted? }
109
+ aln3.columns.size.should == 399
110
+ end
@@ -7,6 +7,6 @@ require 'bio-alignment'
7
7
  # in ./support/ and its subdirectories.
8
8
  Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
9
 
10
- RSpec.configure do |config|
11
-
12
- end
10
+ # RSpec.configure do |config|
11
+ #
12
+ # end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-28 00:00:00.000000000Z
12
+ date: 2012-03-17 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio-logger
16
- requirement: &11633860 !ruby/object:Gem::Requirement
16
+ requirement: &26202820 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *11633860
24
+ version_requirements: *26202820
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bio
27
- requirement: &11632600 !ruby/object:Gem::Requirement
27
+ requirement: &26201340 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,21 @@ dependencies:
32
32
  version: 1.4.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *11632600
35
+ version_requirements: *26201340
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &26199400 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *26199400
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: bio-bigbio
38
- requirement: &11622800 !ruby/object:Gem::Requirement
49
+ requirement: &26197880 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ! '>'
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 0.1.3
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *11622800
57
+ version_requirements: *26197880
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: cucumber
49
- requirement: &11622000 !ruby/object:Gem::Requirement
60
+ requirement: &26196760 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: '0'
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *11622000
68
+ version_requirements: *26196760
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: rspec
60
- requirement: &11621400 !ruby/object:Gem::Requirement
71
+ requirement: &26195120 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,29 +76,29 @@ dependencies:
65
76
  version: 2.3.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *11621400
79
+ version_requirements: *26195120
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: bundler
71
- requirement: &11620480 !ruby/object:Gem::Requirement
82
+ requirement: &26194620 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
- - - ~>
85
+ - - ! '>='
75
86
  - !ruby/object:Gem::Version
76
- version: 1.0.0
87
+ version: 1.0.21
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *11620480
90
+ version_requirements: *26194620
80
91
  - !ruby/object:Gem::Dependency
81
92
  name: jeweler
82
- requirement: &11619680 !ruby/object:Gem::Requirement
93
+ requirement: &26193920 !ruby/object:Gem::Requirement
83
94
  none: false
84
95
  requirements:
85
- - - ~>
96
+ - - ! '>='
86
97
  - !ruby/object:Gem::Version
87
- version: 1.7.0
98
+ version: '0'
88
99
  type: :development
89
100
  prerelease: false
90
- version_requirements: *11619680
101
+ version_requirements: *26193920
91
102
  description: Alignment handler for multiple sequence alignments (MSA)
92
103
  email: pjotr.public01@thebird.nl
93
104
  executables:
@@ -115,7 +126,10 @@ files:
115
126
  - features/columns.feature
116
127
  - features/edit/del_bridges-feature.rb
117
128
  - features/edit/del_bridges.feature
129
+ - features/edit/del_non_informative_sequences-feature.rb
118
130
  - features/edit/del_non_informative_sequences.feature
131
+ - features/edit/del_short_sequences-feature.rb
132
+ - features/edit/del_short_sequences.feature
119
133
  - features/edit/gblocks-feature.rb
120
134
  - features/edit/gblocks.feature
121
135
  - features/edit/mask_islands-feature.rb
@@ -126,15 +140,26 @@ files:
126
140
  - features/pal2nal.feature
127
141
  - features/rows-feature.rb
128
142
  - features/rows.feature
143
+ - features/tree-feature.rb
144
+ - features/tree.feature
129
145
  - lib/bio-alignment.rb
130
146
  - lib/bio-alignment/alignment.rb
131
147
  - lib/bio-alignment/bioruby.rb
132
148
  - lib/bio-alignment/codonsequence.rb
133
- - lib/bio-alignment/column.rb
149
+ - lib/bio-alignment/columns.rb
134
150
  - lib/bio-alignment/edit/del_bridges.rb
151
+ - lib/bio-alignment/edit/del_non_informative_sequences.rb
152
+ - lib/bio-alignment/edit/del_short_sequences.rb
153
+ - lib/bio-alignment/edit/edit_columns.rb
154
+ - lib/bio-alignment/edit/edit_rows.rb
155
+ - lib/bio-alignment/edit/mask_islands.rb
156
+ - lib/bio-alignment/edit/mask_serial_mutations.rb
157
+ - lib/bio-alignment/elements.rb
135
158
  - lib/bio-alignment/pal2nal.rb
159
+ - lib/bio-alignment/rows.rb
136
160
  - lib/bio-alignment/sequence.rb
137
161
  - lib/bio-alignment/state.rb
162
+ - lib/bio-alignment/tree.rb
138
163
  - spec/bio-alignment_spec.rb
139
164
  - spec/spec_helper.rb
140
165
  - test/data/fasta/codon/aa-alignment.fa
@@ -158,7 +183,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
158
183
  version: '0'
159
184
  segments:
160
185
  - 0
161
- hash: 1565072942973090495
186
+ hash: 1800672102634743595
162
187
  required_rubygems_version: !ruby/object:Gem::Requirement
163
188
  none: false
164
189
  requirements: