bio-alignment 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/Gemfile +5 -4
  2. data/README.md +94 -9
  3. data/Rakefile +2 -1
  4. data/VERSION +1 -1
  5. data/doc/bio-alignment-design.md +75 -11
  6. data/features/bioruby-feature.rb +17 -0
  7. data/features/bioruby.feature +6 -1
  8. data/features/columns-feature.rb +2 -0
  9. data/features/edit/del_bridges-feature.rb +7 -3
  10. data/features/edit/del_bridges.feature +1 -2
  11. data/features/edit/del_non_informative_sequences-feature.rb +26 -0
  12. data/features/edit/del_non_informative_sequences.feature +19 -0
  13. data/features/edit/del_short_sequences-feature.rb +21 -0
  14. data/features/edit/del_short_sequences.feature +25 -0
  15. data/features/edit/gblocks-feature.rb +2 -2
  16. data/features/edit/mask_islands-feature.rb +17 -4
  17. data/features/edit/mask_islands.feature +28 -17
  18. data/features/edit/mask_serial_mutations-feature.rb +8 -6
  19. data/features/edit/mask_serial_mutations.feature +11 -11
  20. data/features/tree-feature.rb +66 -0
  21. data/features/tree.feature +45 -0
  22. data/lib/bio-alignment.rb +4 -1
  23. data/lib/bio-alignment/alignment.rb +58 -3
  24. data/lib/bio-alignment/codonsequence.rb +14 -2
  25. data/lib/bio-alignment/columns.rb +102 -0
  26. data/lib/bio-alignment/edit/del_bridges.rb +18 -1
  27. data/lib/bio-alignment/edit/del_non_informative_sequences.rb +27 -0
  28. data/lib/bio-alignment/edit/del_short_sequences.rb +28 -0
  29. data/lib/bio-alignment/edit/edit_columns.rb +22 -0
  30. data/lib/bio-alignment/edit/edit_rows.rb +49 -0
  31. data/lib/bio-alignment/edit/mask_islands.rb +115 -0
  32. data/lib/bio-alignment/edit/mask_serial_mutations.rb +44 -0
  33. data/lib/bio-alignment/elements.rb +86 -0
  34. data/lib/bio-alignment/rows.rb +52 -0
  35. data/lib/bio-alignment/sequence.rb +20 -14
  36. data/lib/bio-alignment/state.rb +64 -8
  37. data/lib/bio-alignment/tree.rb +77 -0
  38. data/spec/bio-alignment_spec.rb +57 -1
  39. data/spec/spec_helper.rb +3 -3
  40. metadata +47 -22
  41. data/lib/bio-alignment/column.rb +0 -47
@@ -0,0 +1,52 @@
1
+ require 'bio-alignment/state'
2
+
3
+ module Bio
4
+
5
+ module BioAlignment
6
+
7
+ # The Rows module provides accessors for the Row list
8
+ # returning Row objects
9
+ module Rows
10
+
11
+ # Return an copy of an alignment which matching rows. The originating
12
+ # sequences should have methods 'empty_copy' and '<<'
13
+ def rows_where &block
14
+ seqs = []
15
+ rows.each do | seq |
16
+ seqs << seq.clone if block.call(seq)
17
+ end
18
+ Alignment.new(seqs)
19
+ end
20
+
21
+ end
22
+
23
+ # Support the notion of Rows in an alignment. A Row
24
+ # can have state by attaching state objects
25
+ class Row
26
+ include State
27
+
28
+ def initialize aln, row
29
+ @aln = aln
30
+ @row = row
31
+ end
32
+
33
+ def count &block
34
+ counter = 0
35
+ each do | e |
36
+ found =
37
+ if e.kind_of?(String)
38
+ block.call(Element.new(e))
39
+ else
40
+ block.call(e)
41
+ end
42
+ counter += 1 if found
43
+ end
44
+ counter
45
+ end
46
+
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+
@@ -1,20 +1,8 @@
1
1
  module Bio
2
2
  module BioAlignment
3
3
 
4
- # Simple element that can be queried
5
- class Element
6
- def initialize c
7
- @c = c
8
- end
9
- def gap?
10
- @c == '-'
11
- end
12
- def to_s
13
- @c
14
- end
15
- end
16
-
17
- # A Sequence is a simple container for String sequences/lists
4
+ # A Sequence is a simple and efficient container for String sequences. To
5
+ # add state to elements unpack it into an Elements object with to_elements.
18
6
  #
19
7
  class Sequence
20
8
  include Enumerable
@@ -34,6 +22,8 @@ module Bio
34
22
  @seq.length
35
23
  end
36
24
 
25
+ # Return each element in the Sequence as an Element opbject, so it
26
+ # can be queried for gap? and undefined?
37
27
  def each
38
28
  @seq.each_char { | c | yield Element.new(c) }
39
29
  end
@@ -42,6 +32,22 @@ module Bio
42
32
  @seq.to_s
43
33
  end
44
34
 
35
+ def << element
36
+ @seq += element.to_s
37
+ end
38
+
39
+ def empty_copy
40
+ Sequence.new(@id,"")
41
+ end
42
+
43
+ def clone
44
+ Sequence.new(@id,@seq.clone)
45
+ end
46
+
47
+ # Return Sequence (string) as an Elements object
48
+ def to_elements
49
+ Elements.new(@id,@seq)
50
+ end
45
51
  end
46
52
  end
47
53
  end
@@ -6,24 +6,80 @@ module Bio
6
6
  attr_accessor :state
7
7
  end
8
8
 
9
- # Convenience class for tracking state. Note you can add
10
- # any class you like
11
- class ColumnState
9
+ module DeleteState
12
10
  attr_accessor :deleted
13
11
 
12
+ def delete!
13
+ @deleted = true
14
+ end
15
+
14
16
  def deleted?
15
- deleted == true
17
+ @deleted == true
18
+ end
19
+
20
+ def to_s
21
+ (deleted? ? 'X' : '.')
22
+ end
23
+ end
24
+
25
+ module MarkState
26
+ attr_accessor :marked
27
+
28
+ def mark!
29
+ @marked = true
30
+ end
31
+
32
+ def unmark!
33
+ @marked = false
34
+ end
35
+
36
+ def marked?
37
+ @marked == true
38
+ end
39
+
40
+ def to_s
41
+ (marked? ? 'X' : '.')
42
+ end
43
+ end
44
+
45
+ module MaskState
46
+ attr_accessor :masked
47
+
48
+ def mask!
49
+ @masked = true
50
+ end
51
+
52
+ def unmask!
53
+ @masked = false
54
+ end
55
+
56
+ def masked?
57
+ @masked == true
58
+ end
59
+
60
+ def to_s
61
+ (masked? ? 'X' : '.')
16
62
  end
17
63
  end
18
64
 
65
+ # Convenience class for tracking state. Note you can add
66
+ # any class you like
67
+ class ColumnState
68
+ include DeleteState
69
+ end
70
+
19
71
  # Convenience class for tracking state. Note you can add
20
72
  # any class you like
21
73
  class RowState
22
- attr_accessor :deleted
74
+ include DeleteState
75
+ end
23
76
 
24
- def deleted?
25
- deleted == true
26
- end
77
+ class ElementState
78
+ include MarkState
79
+ end
80
+
81
+ class ElementMaskedState
82
+ include MaskState
27
83
  end
28
84
 
29
85
  end
@@ -0,0 +1,77 @@
1
+ module Bio
2
+
3
+ module BioAlignment
4
+
5
+ # The Tree module turns a tree into a traversable object, by wrapping
6
+ # BioRuby's basic tree objects. The Bio::Tree object can always be
7
+ # fetched using to_bioruby_tree.
8
+
9
+ module Tree
10
+
11
+ class Node
12
+ end
13
+
14
+ # Make all nodes in the Bio::Tree aware of the tree object so we can use
15
+ # its methods
16
+ def Tree::init tree
17
+ if tree.kind_of?(Bio::Tree)
18
+ # walk all nodes and infect the tree info
19
+ tree.each_node do | node |
20
+ node.inject_tree(tree)
21
+ end
22
+ # tree.root.set_tree(tree)
23
+ else
24
+ raise "BioAlignment::Tree does not understand tree type "+tree.class.to_s
25
+ end
26
+ return tree
27
+ end
28
+
29
+ def root
30
+ @tree.root
31
+ end
32
+
33
+ end
34
+
35
+ end
36
+
37
+ # Here we add to BioRuby's Bio::Tree classes
38
+ class Tree
39
+ class Node
40
+ def inject_tree tree
41
+ @tree = tree
42
+ end
43
+
44
+ def leaf?
45
+ children.size == 0
46
+ end
47
+
48
+ def children
49
+ @tree.children(self)
50
+ end
51
+
52
+ def parent
53
+ @tree.parent(self)
54
+ end
55
+
56
+ # Get the distance to another node (FIXME: write test)
57
+ def distance other
58
+ @tree.distance(self,other)
59
+ end
60
+ end
61
+
62
+ def find name
63
+ get_node_by_name(name)
64
+ end
65
+
66
+ # Walk the ordered tree leaves, calling into the block, and return an array
67
+ def map
68
+ res = []
69
+ leaves.each do | leaf |
70
+ item = yield leaf
71
+ res << item
72
+ end
73
+ res
74
+ end
75
+
76
+ end
77
+ end
@@ -1,6 +1,7 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
- require 'bigbio'
3
+ require 'bio-alignment'
4
+ require 'bigbio' # for the FastaReader
4
5
  include Bio::BioAlignment # Namespace
5
6
 
6
7
  describe "BioAlignment::CodonSequence" do
@@ -49,6 +50,61 @@ describe "BioAlignment::Alignment" do
49
50
  aln.sequences.first.seq[0].to_aa.should == "M"
50
51
  aln.sequences.first.seq[2].to_aa.should == "T"
51
52
  end
53
+ end
54
+
52
55
 
56
+ describe "BioAlignment::DelBridges" do
57
+ require 'bio-alignment/edit/del_bridges'
58
+ string =
59
+ """
60
+ ----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
61
+ SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
62
+ SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
63
+ ----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
64
+ ----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
65
+ ----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
66
+ ----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
67
+ ----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
68
+ -------------IFHAVR-TC-HP-----------------
69
+ """
70
+ aln = Alignment.new(string.split(/\n/))
71
+ print aln.to_s,"\n"
72
+ columns = aln.columns
73
+ columns.should_not == nil
74
+ columns.should_not == []
75
+ columns.size.should == 42
76
+ # make sure we are using the same columns
77
+ aln.columns.should == columns
78
+ aln.extend DelBridges
79
+ aln2 = aln.mark_bridges
80
+ print aln2.to_s,"\n"
81
+ columns2 = aln2.columns
82
+ columns2.should_not == nil
83
+ columns2.should_not == []
84
+ columns2.count { |col| col.state.deleted? }.should == 6
85
+ aln2.columns[0].state.should == columns2[0].state
86
+ aln2.columns[0].state.should_not == columns[0].state
87
+ aln2.rows.first.to_s.should == "----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV"
88
+ # now write out the alignment with deleted columns removed
89
+ aln3 = aln2.columns_where { |col| !col.state.deleted? }
90
+ print aln3.to_s,"\n"
91
+ aln3.rows.first.to_s.should == "SNSFSRPTIIFSGCSTACSGKSELVCGFRSFMLSDV"
53
92
  end
54
93
 
94
+ describe "BioAlignment::DelBridges for codons" do
95
+ # We are going to do the same for a codon alignment
96
+ aln = Alignment.new
97
+ fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
98
+ fasta.each do | rec |
99
+ aln.sequences << CodonSequence.new(rec.id, rec.seq)
100
+ end
101
+ aln.extend DelBridges
102
+ aln2 = aln.mark_bridges
103
+ # print aln2[0].to_s,"\n"
104
+ aln2.columns.size.should == 404
105
+ # count deleted columns
106
+ aln2.columns.count { |col| col.state.deleted? }.should == 5
107
+ # create new alignment
108
+ aln3 = aln2.columns_where { |col| !col.state.deleted? }
109
+ aln3.columns.size.should == 399
110
+ end
@@ -7,6 +7,6 @@ require 'bio-alignment'
7
7
  # in ./support/ and its subdirectories.
8
8
  Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
9
 
10
- RSpec.configure do |config|
11
-
12
- end
10
+ # RSpec.configure do |config|
11
+ #
12
+ # end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-28 00:00:00.000000000Z
12
+ date: 2012-03-17 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio-logger
16
- requirement: &11633860 !ruby/object:Gem::Requirement
16
+ requirement: &26202820 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *11633860
24
+ version_requirements: *26202820
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bio
27
- requirement: &11632600 !ruby/object:Gem::Requirement
27
+ requirement: &26201340 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,21 @@ dependencies:
32
32
  version: 1.4.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *11632600
35
+ version_requirements: *26201340
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &26199400 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *26199400
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: bio-bigbio
38
- requirement: &11622800 !ruby/object:Gem::Requirement
49
+ requirement: &26197880 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ! '>'
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 0.1.3
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *11622800
57
+ version_requirements: *26197880
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: cucumber
49
- requirement: &11622000 !ruby/object:Gem::Requirement
60
+ requirement: &26196760 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: '0'
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *11622000
68
+ version_requirements: *26196760
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: rspec
60
- requirement: &11621400 !ruby/object:Gem::Requirement
71
+ requirement: &26195120 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,29 +76,29 @@ dependencies:
65
76
  version: 2.3.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *11621400
79
+ version_requirements: *26195120
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: bundler
71
- requirement: &11620480 !ruby/object:Gem::Requirement
82
+ requirement: &26194620 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
- - - ~>
85
+ - - ! '>='
75
86
  - !ruby/object:Gem::Version
76
- version: 1.0.0
87
+ version: 1.0.21
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *11620480
90
+ version_requirements: *26194620
80
91
  - !ruby/object:Gem::Dependency
81
92
  name: jeweler
82
- requirement: &11619680 !ruby/object:Gem::Requirement
93
+ requirement: &26193920 !ruby/object:Gem::Requirement
83
94
  none: false
84
95
  requirements:
85
- - - ~>
96
+ - - ! '>='
86
97
  - !ruby/object:Gem::Version
87
- version: 1.7.0
98
+ version: '0'
88
99
  type: :development
89
100
  prerelease: false
90
- version_requirements: *11619680
101
+ version_requirements: *26193920
91
102
  description: Alignment handler for multiple sequence alignments (MSA)
92
103
  email: pjotr.public01@thebird.nl
93
104
  executables:
@@ -115,7 +126,10 @@ files:
115
126
  - features/columns.feature
116
127
  - features/edit/del_bridges-feature.rb
117
128
  - features/edit/del_bridges.feature
129
+ - features/edit/del_non_informative_sequences-feature.rb
118
130
  - features/edit/del_non_informative_sequences.feature
131
+ - features/edit/del_short_sequences-feature.rb
132
+ - features/edit/del_short_sequences.feature
119
133
  - features/edit/gblocks-feature.rb
120
134
  - features/edit/gblocks.feature
121
135
  - features/edit/mask_islands-feature.rb
@@ -126,15 +140,26 @@ files:
126
140
  - features/pal2nal.feature
127
141
  - features/rows-feature.rb
128
142
  - features/rows.feature
143
+ - features/tree-feature.rb
144
+ - features/tree.feature
129
145
  - lib/bio-alignment.rb
130
146
  - lib/bio-alignment/alignment.rb
131
147
  - lib/bio-alignment/bioruby.rb
132
148
  - lib/bio-alignment/codonsequence.rb
133
- - lib/bio-alignment/column.rb
149
+ - lib/bio-alignment/columns.rb
134
150
  - lib/bio-alignment/edit/del_bridges.rb
151
+ - lib/bio-alignment/edit/del_non_informative_sequences.rb
152
+ - lib/bio-alignment/edit/del_short_sequences.rb
153
+ - lib/bio-alignment/edit/edit_columns.rb
154
+ - lib/bio-alignment/edit/edit_rows.rb
155
+ - lib/bio-alignment/edit/mask_islands.rb
156
+ - lib/bio-alignment/edit/mask_serial_mutations.rb
157
+ - lib/bio-alignment/elements.rb
135
158
  - lib/bio-alignment/pal2nal.rb
159
+ - lib/bio-alignment/rows.rb
136
160
  - lib/bio-alignment/sequence.rb
137
161
  - lib/bio-alignment/state.rb
162
+ - lib/bio-alignment/tree.rb
138
163
  - spec/bio-alignment_spec.rb
139
164
  - spec/spec_helper.rb
140
165
  - test/data/fasta/codon/aa-alignment.fa
@@ -158,7 +183,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
158
183
  version: '0'
159
184
  segments:
160
185
  - 0
161
- hash: 1565072942973090495
186
+ hash: 1800672102634743595
162
187
  required_rubygems_version: !ruby/object:Gem::Requirement
163
188
  none: false
164
189
  requirements: