bio-alignment 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +5 -4
- data/README.md +94 -9
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/doc/bio-alignment-design.md +75 -11
- data/features/bioruby-feature.rb +17 -0
- data/features/bioruby.feature +6 -1
- data/features/columns-feature.rb +2 -0
- data/features/edit/del_bridges-feature.rb +7 -3
- data/features/edit/del_bridges.feature +1 -2
- data/features/edit/del_non_informative_sequences-feature.rb +26 -0
- data/features/edit/del_non_informative_sequences.feature +19 -0
- data/features/edit/del_short_sequences-feature.rb +21 -0
- data/features/edit/del_short_sequences.feature +25 -0
- data/features/edit/gblocks-feature.rb +2 -2
- data/features/edit/mask_islands-feature.rb +17 -4
- data/features/edit/mask_islands.feature +28 -17
- data/features/edit/mask_serial_mutations-feature.rb +8 -6
- data/features/edit/mask_serial_mutations.feature +11 -11
- data/features/tree-feature.rb +66 -0
- data/features/tree.feature +45 -0
- data/lib/bio-alignment.rb +4 -1
- data/lib/bio-alignment/alignment.rb +58 -3
- data/lib/bio-alignment/codonsequence.rb +14 -2
- data/lib/bio-alignment/columns.rb +102 -0
- data/lib/bio-alignment/edit/del_bridges.rb +18 -1
- data/lib/bio-alignment/edit/del_non_informative_sequences.rb +27 -0
- data/lib/bio-alignment/edit/del_short_sequences.rb +28 -0
- data/lib/bio-alignment/edit/edit_columns.rb +22 -0
- data/lib/bio-alignment/edit/edit_rows.rb +49 -0
- data/lib/bio-alignment/edit/mask_islands.rb +115 -0
- data/lib/bio-alignment/edit/mask_serial_mutations.rb +44 -0
- data/lib/bio-alignment/elements.rb +86 -0
- data/lib/bio-alignment/rows.rb +52 -0
- data/lib/bio-alignment/sequence.rb +20 -14
- data/lib/bio-alignment/state.rb +64 -8
- data/lib/bio-alignment/tree.rb +77 -0
- data/spec/bio-alignment_spec.rb +57 -1
- data/spec/spec_helper.rb +3 -3
- metadata +47 -22
- data/lib/bio-alignment/column.rb +0 -47
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'bio-alignment/state'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
|
5
|
+
module BioAlignment
|
6
|
+
|
7
|
+
# The Rows module provides accessors for the Row list
|
8
|
+
# returning Row objects
|
9
|
+
module Rows
|
10
|
+
|
11
|
+
# Return an copy of an alignment which matching rows. The originating
|
12
|
+
# sequences should have methods 'empty_copy' and '<<'
|
13
|
+
def rows_where &block
|
14
|
+
seqs = []
|
15
|
+
rows.each do | seq |
|
16
|
+
seqs << seq.clone if block.call(seq)
|
17
|
+
end
|
18
|
+
Alignment.new(seqs)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
# Support the notion of Rows in an alignment. A Row
|
24
|
+
# can have state by attaching state objects
|
25
|
+
class Row
|
26
|
+
include State
|
27
|
+
|
28
|
+
def initialize aln, row
|
29
|
+
@aln = aln
|
30
|
+
@row = row
|
31
|
+
end
|
32
|
+
|
33
|
+
def count &block
|
34
|
+
counter = 0
|
35
|
+
each do | e |
|
36
|
+
found =
|
37
|
+
if e.kind_of?(String)
|
38
|
+
block.call(Element.new(e))
|
39
|
+
else
|
40
|
+
block.call(e)
|
41
|
+
end
|
42
|
+
counter += 1 if found
|
43
|
+
end
|
44
|
+
counter
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
@@ -1,20 +1,8 @@
|
|
1
1
|
module Bio
|
2
2
|
module BioAlignment
|
3
3
|
|
4
|
-
#
|
5
|
-
|
6
|
-
def initialize c
|
7
|
-
@c = c
|
8
|
-
end
|
9
|
-
def gap?
|
10
|
-
@c == '-'
|
11
|
-
end
|
12
|
-
def to_s
|
13
|
-
@c
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
# A Sequence is a simple container for String sequences/lists
|
4
|
+
# A Sequence is a simple and efficient container for String sequences. To
|
5
|
+
# add state to elements unpack it into an Elements object with to_elements.
|
18
6
|
#
|
19
7
|
class Sequence
|
20
8
|
include Enumerable
|
@@ -34,6 +22,8 @@ module Bio
|
|
34
22
|
@seq.length
|
35
23
|
end
|
36
24
|
|
25
|
+
# Return each element in the Sequence as an Element opbject, so it
|
26
|
+
# can be queried for gap? and undefined?
|
37
27
|
def each
|
38
28
|
@seq.each_char { | c | yield Element.new(c) }
|
39
29
|
end
|
@@ -42,6 +32,22 @@ module Bio
|
|
42
32
|
@seq.to_s
|
43
33
|
end
|
44
34
|
|
35
|
+
def << element
|
36
|
+
@seq += element.to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
def empty_copy
|
40
|
+
Sequence.new(@id,"")
|
41
|
+
end
|
42
|
+
|
43
|
+
def clone
|
44
|
+
Sequence.new(@id,@seq.clone)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Return Sequence (string) as an Elements object
|
48
|
+
def to_elements
|
49
|
+
Elements.new(@id,@seq)
|
50
|
+
end
|
45
51
|
end
|
46
52
|
end
|
47
53
|
end
|
data/lib/bio-alignment/state.rb
CHANGED
@@ -6,24 +6,80 @@ module Bio
|
|
6
6
|
attr_accessor :state
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
# any class you like
|
11
|
-
class ColumnState
|
9
|
+
module DeleteState
|
12
10
|
attr_accessor :deleted
|
13
11
|
|
12
|
+
def delete!
|
13
|
+
@deleted = true
|
14
|
+
end
|
15
|
+
|
14
16
|
def deleted?
|
15
|
-
deleted == true
|
17
|
+
@deleted == true
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
(deleted? ? 'X' : '.')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
module MarkState
|
26
|
+
attr_accessor :marked
|
27
|
+
|
28
|
+
def mark!
|
29
|
+
@marked = true
|
30
|
+
end
|
31
|
+
|
32
|
+
def unmark!
|
33
|
+
@marked = false
|
34
|
+
end
|
35
|
+
|
36
|
+
def marked?
|
37
|
+
@marked == true
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
(marked? ? 'X' : '.')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
module MaskState
|
46
|
+
attr_accessor :masked
|
47
|
+
|
48
|
+
def mask!
|
49
|
+
@masked = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def unmask!
|
53
|
+
@masked = false
|
54
|
+
end
|
55
|
+
|
56
|
+
def masked?
|
57
|
+
@masked == true
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_s
|
61
|
+
(masked? ? 'X' : '.')
|
16
62
|
end
|
17
63
|
end
|
18
64
|
|
65
|
+
# Convenience class for tracking state. Note you can add
|
66
|
+
# any class you like
|
67
|
+
class ColumnState
|
68
|
+
include DeleteState
|
69
|
+
end
|
70
|
+
|
19
71
|
# Convenience class for tracking state. Note you can add
|
20
72
|
# any class you like
|
21
73
|
class RowState
|
22
|
-
|
74
|
+
include DeleteState
|
75
|
+
end
|
23
76
|
|
24
|
-
|
25
|
-
|
26
|
-
|
77
|
+
class ElementState
|
78
|
+
include MarkState
|
79
|
+
end
|
80
|
+
|
81
|
+
class ElementMaskedState
|
82
|
+
include MaskState
|
27
83
|
end
|
28
84
|
|
29
85
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Bio
|
2
|
+
|
3
|
+
module BioAlignment
|
4
|
+
|
5
|
+
# The Tree module turns a tree into a traversable object, by wrapping
|
6
|
+
# BioRuby's basic tree objects. The Bio::Tree object can always be
|
7
|
+
# fetched using to_bioruby_tree.
|
8
|
+
|
9
|
+
module Tree
|
10
|
+
|
11
|
+
class Node
|
12
|
+
end
|
13
|
+
|
14
|
+
# Make all nodes in the Bio::Tree aware of the tree object so we can use
|
15
|
+
# its methods
|
16
|
+
def Tree::init tree
|
17
|
+
if tree.kind_of?(Bio::Tree)
|
18
|
+
# walk all nodes and infect the tree info
|
19
|
+
tree.each_node do | node |
|
20
|
+
node.inject_tree(tree)
|
21
|
+
end
|
22
|
+
# tree.root.set_tree(tree)
|
23
|
+
else
|
24
|
+
raise "BioAlignment::Tree does not understand tree type "+tree.class.to_s
|
25
|
+
end
|
26
|
+
return tree
|
27
|
+
end
|
28
|
+
|
29
|
+
def root
|
30
|
+
@tree.root
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
# Here we add to BioRuby's Bio::Tree classes
|
38
|
+
class Tree
|
39
|
+
class Node
|
40
|
+
def inject_tree tree
|
41
|
+
@tree = tree
|
42
|
+
end
|
43
|
+
|
44
|
+
def leaf?
|
45
|
+
children.size == 0
|
46
|
+
end
|
47
|
+
|
48
|
+
def children
|
49
|
+
@tree.children(self)
|
50
|
+
end
|
51
|
+
|
52
|
+
def parent
|
53
|
+
@tree.parent(self)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Get the distance to another node (FIXME: write test)
|
57
|
+
def distance other
|
58
|
+
@tree.distance(self,other)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def find name
|
63
|
+
get_node_by_name(name)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Walk the ordered tree leaves, calling into the block, and return an array
|
67
|
+
def map
|
68
|
+
res = []
|
69
|
+
leaves.each do | leaf |
|
70
|
+
item = yield leaf
|
71
|
+
res << item
|
72
|
+
end
|
73
|
+
res
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
data/spec/bio-alignment_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'bio-alignment'
|
4
|
+
require 'bigbio' # for the FastaReader
|
4
5
|
include Bio::BioAlignment # Namespace
|
5
6
|
|
6
7
|
describe "BioAlignment::CodonSequence" do
|
@@ -49,6 +50,61 @@ describe "BioAlignment::Alignment" do
|
|
49
50
|
aln.sequences.first.seq[0].to_aa.should == "M"
|
50
51
|
aln.sequences.first.seq[2].to_aa.should == "T"
|
51
52
|
end
|
53
|
+
end
|
54
|
+
|
52
55
|
|
56
|
+
describe "BioAlignment::DelBridges" do
|
57
|
+
require 'bio-alignment/edit/del_bridges'
|
58
|
+
string =
|
59
|
+
"""
|
60
|
+
----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
|
61
|
+
SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
|
62
|
+
SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
|
63
|
+
----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
|
64
|
+
----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
|
65
|
+
----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
|
66
|
+
----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
|
67
|
+
----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
|
68
|
+
-------------IFHAVR-TC-HP-----------------
|
69
|
+
"""
|
70
|
+
aln = Alignment.new(string.split(/\n/))
|
71
|
+
print aln.to_s,"\n"
|
72
|
+
columns = aln.columns
|
73
|
+
columns.should_not == nil
|
74
|
+
columns.should_not == []
|
75
|
+
columns.size.should == 42
|
76
|
+
# make sure we are using the same columns
|
77
|
+
aln.columns.should == columns
|
78
|
+
aln.extend DelBridges
|
79
|
+
aln2 = aln.mark_bridges
|
80
|
+
print aln2.to_s,"\n"
|
81
|
+
columns2 = aln2.columns
|
82
|
+
columns2.should_not == nil
|
83
|
+
columns2.should_not == []
|
84
|
+
columns2.count { |col| col.state.deleted? }.should == 6
|
85
|
+
aln2.columns[0].state.should == columns2[0].state
|
86
|
+
aln2.columns[0].state.should_not == columns[0].state
|
87
|
+
aln2.rows.first.to_s.should == "----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV"
|
88
|
+
# now write out the alignment with deleted columns removed
|
89
|
+
aln3 = aln2.columns_where { |col| !col.state.deleted? }
|
90
|
+
print aln3.to_s,"\n"
|
91
|
+
aln3.rows.first.to_s.should == "SNSFSRPTIIFSGCSTACSGKSELVCGFRSFMLSDV"
|
53
92
|
end
|
54
93
|
|
94
|
+
describe "BioAlignment::DelBridges for codons" do
|
95
|
+
# We are going to do the same for a codon alignment
|
96
|
+
aln = Alignment.new
|
97
|
+
fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
|
98
|
+
fasta.each do | rec |
|
99
|
+
aln.sequences << CodonSequence.new(rec.id, rec.seq)
|
100
|
+
end
|
101
|
+
aln.extend DelBridges
|
102
|
+
aln2 = aln.mark_bridges
|
103
|
+
# print aln2[0].to_s,"\n"
|
104
|
+
aln2.columns.size.should == 404
|
105
|
+
# count deleted columns
|
106
|
+
aln2.columns.count { |col| col.state.deleted? }.should == 5
|
107
|
+
# create new alignment
|
108
|
+
aln3 = aln2.columns_where { |col| !col.state.deleted? }
|
109
|
+
aln3.columns.size.should == 399
|
110
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-03-17 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio-logger
|
16
|
-
requirement: &
|
16
|
+
requirement: &26202820 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *26202820
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bio
|
27
|
-
requirement: &
|
27
|
+
requirement: &26201340 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,21 @@ dependencies:
|
|
32
32
|
version: 1.4.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *26201340
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rake
|
38
|
+
requirement: &26199400 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *26199400
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: bio-bigbio
|
38
|
-
requirement: &
|
49
|
+
requirement: &26197880 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ! '>'
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: 0.1.3
|
44
55
|
type: :development
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *26197880
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: cucumber
|
49
|
-
requirement: &
|
60
|
+
requirement: &26196760 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ! '>='
|
@@ -54,10 +65,10 @@ dependencies:
|
|
54
65
|
version: '0'
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *26196760
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: rspec
|
60
|
-
requirement: &
|
71
|
+
requirement: &26195120 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ~>
|
@@ -65,29 +76,29 @@ dependencies:
|
|
65
76
|
version: 2.3.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *26195120
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: bundler
|
71
|
-
requirement: &
|
82
|
+
requirement: &26194620 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
|
-
- -
|
85
|
+
- - ! '>='
|
75
86
|
- !ruby/object:Gem::Version
|
76
|
-
version: 1.0.
|
87
|
+
version: 1.0.21
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *26194620
|
80
91
|
- !ruby/object:Gem::Dependency
|
81
92
|
name: jeweler
|
82
|
-
requirement: &
|
93
|
+
requirement: &26193920 !ruby/object:Gem::Requirement
|
83
94
|
none: false
|
84
95
|
requirements:
|
85
|
-
- -
|
96
|
+
- - ! '>='
|
86
97
|
- !ruby/object:Gem::Version
|
87
|
-
version:
|
98
|
+
version: '0'
|
88
99
|
type: :development
|
89
100
|
prerelease: false
|
90
|
-
version_requirements: *
|
101
|
+
version_requirements: *26193920
|
91
102
|
description: Alignment handler for multiple sequence alignments (MSA)
|
92
103
|
email: pjotr.public01@thebird.nl
|
93
104
|
executables:
|
@@ -115,7 +126,10 @@ files:
|
|
115
126
|
- features/columns.feature
|
116
127
|
- features/edit/del_bridges-feature.rb
|
117
128
|
- features/edit/del_bridges.feature
|
129
|
+
- features/edit/del_non_informative_sequences-feature.rb
|
118
130
|
- features/edit/del_non_informative_sequences.feature
|
131
|
+
- features/edit/del_short_sequences-feature.rb
|
132
|
+
- features/edit/del_short_sequences.feature
|
119
133
|
- features/edit/gblocks-feature.rb
|
120
134
|
- features/edit/gblocks.feature
|
121
135
|
- features/edit/mask_islands-feature.rb
|
@@ -126,15 +140,26 @@ files:
|
|
126
140
|
- features/pal2nal.feature
|
127
141
|
- features/rows-feature.rb
|
128
142
|
- features/rows.feature
|
143
|
+
- features/tree-feature.rb
|
144
|
+
- features/tree.feature
|
129
145
|
- lib/bio-alignment.rb
|
130
146
|
- lib/bio-alignment/alignment.rb
|
131
147
|
- lib/bio-alignment/bioruby.rb
|
132
148
|
- lib/bio-alignment/codonsequence.rb
|
133
|
-
- lib/bio-alignment/
|
149
|
+
- lib/bio-alignment/columns.rb
|
134
150
|
- lib/bio-alignment/edit/del_bridges.rb
|
151
|
+
- lib/bio-alignment/edit/del_non_informative_sequences.rb
|
152
|
+
- lib/bio-alignment/edit/del_short_sequences.rb
|
153
|
+
- lib/bio-alignment/edit/edit_columns.rb
|
154
|
+
- lib/bio-alignment/edit/edit_rows.rb
|
155
|
+
- lib/bio-alignment/edit/mask_islands.rb
|
156
|
+
- lib/bio-alignment/edit/mask_serial_mutations.rb
|
157
|
+
- lib/bio-alignment/elements.rb
|
135
158
|
- lib/bio-alignment/pal2nal.rb
|
159
|
+
- lib/bio-alignment/rows.rb
|
136
160
|
- lib/bio-alignment/sequence.rb
|
137
161
|
- lib/bio-alignment/state.rb
|
162
|
+
- lib/bio-alignment/tree.rb
|
138
163
|
- spec/bio-alignment_spec.rb
|
139
164
|
- spec/spec_helper.rb
|
140
165
|
- test/data/fasta/codon/aa-alignment.fa
|
@@ -158,7 +183,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
158
183
|
version: '0'
|
159
184
|
segments:
|
160
185
|
- 0
|
161
|
-
hash:
|
186
|
+
hash: 1800672102634743595
|
162
187
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
163
188
|
none: false
|
164
189
|
requirements:
|