bio-alignment 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +5 -4
- data/README.md +94 -9
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/doc/bio-alignment-design.md +75 -11
- data/features/bioruby-feature.rb +17 -0
- data/features/bioruby.feature +6 -1
- data/features/columns-feature.rb +2 -0
- data/features/edit/del_bridges-feature.rb +7 -3
- data/features/edit/del_bridges.feature +1 -2
- data/features/edit/del_non_informative_sequences-feature.rb +26 -0
- data/features/edit/del_non_informative_sequences.feature +19 -0
- data/features/edit/del_short_sequences-feature.rb +21 -0
- data/features/edit/del_short_sequences.feature +25 -0
- data/features/edit/gblocks-feature.rb +2 -2
- data/features/edit/mask_islands-feature.rb +17 -4
- data/features/edit/mask_islands.feature +28 -17
- data/features/edit/mask_serial_mutations-feature.rb +8 -6
- data/features/edit/mask_serial_mutations.feature +11 -11
- data/features/tree-feature.rb +66 -0
- data/features/tree.feature +45 -0
- data/lib/bio-alignment.rb +4 -1
- data/lib/bio-alignment/alignment.rb +58 -3
- data/lib/bio-alignment/codonsequence.rb +14 -2
- data/lib/bio-alignment/columns.rb +102 -0
- data/lib/bio-alignment/edit/del_bridges.rb +18 -1
- data/lib/bio-alignment/edit/del_non_informative_sequences.rb +27 -0
- data/lib/bio-alignment/edit/del_short_sequences.rb +28 -0
- data/lib/bio-alignment/edit/edit_columns.rb +22 -0
- data/lib/bio-alignment/edit/edit_rows.rb +49 -0
- data/lib/bio-alignment/edit/mask_islands.rb +115 -0
- data/lib/bio-alignment/edit/mask_serial_mutations.rb +44 -0
- data/lib/bio-alignment/elements.rb +86 -0
- data/lib/bio-alignment/rows.rb +52 -0
- data/lib/bio-alignment/sequence.rb +20 -14
- data/lib/bio-alignment/state.rb +64 -8
- data/lib/bio-alignment/tree.rb +77 -0
- data/spec/bio-alignment_spec.rb +57 -1
- data/spec/spec_helper.rb +3 -3
- metadata +47 -22
- data/lib/bio-alignment/column.rb +0 -47
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'bio-alignment/state'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
|
5
|
+
module BioAlignment
|
6
|
+
|
7
|
+
# The Rows module provides accessors for the Row list
|
8
|
+
# returning Row objects
|
9
|
+
module Rows
|
10
|
+
|
11
|
+
# Return an copy of an alignment which matching rows. The originating
|
12
|
+
# sequences should have methods 'empty_copy' and '<<'
|
13
|
+
def rows_where &block
|
14
|
+
seqs = []
|
15
|
+
rows.each do | seq |
|
16
|
+
seqs << seq.clone if block.call(seq)
|
17
|
+
end
|
18
|
+
Alignment.new(seqs)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
# Support the notion of Rows in an alignment. A Row
|
24
|
+
# can have state by attaching state objects
|
25
|
+
class Row
|
26
|
+
include State
|
27
|
+
|
28
|
+
def initialize aln, row
|
29
|
+
@aln = aln
|
30
|
+
@row = row
|
31
|
+
end
|
32
|
+
|
33
|
+
def count &block
|
34
|
+
counter = 0
|
35
|
+
each do | e |
|
36
|
+
found =
|
37
|
+
if e.kind_of?(String)
|
38
|
+
block.call(Element.new(e))
|
39
|
+
else
|
40
|
+
block.call(e)
|
41
|
+
end
|
42
|
+
counter += 1 if found
|
43
|
+
end
|
44
|
+
counter
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
@@ -1,20 +1,8 @@
|
|
1
1
|
module Bio
|
2
2
|
module BioAlignment
|
3
3
|
|
4
|
-
#
|
5
|
-
|
6
|
-
def initialize c
|
7
|
-
@c = c
|
8
|
-
end
|
9
|
-
def gap?
|
10
|
-
@c == '-'
|
11
|
-
end
|
12
|
-
def to_s
|
13
|
-
@c
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
# A Sequence is a simple container for String sequences/lists
|
4
|
+
# A Sequence is a simple and efficient container for String sequences. To
|
5
|
+
# add state to elements unpack it into an Elements object with to_elements.
|
18
6
|
#
|
19
7
|
class Sequence
|
20
8
|
include Enumerable
|
@@ -34,6 +22,8 @@ module Bio
|
|
34
22
|
@seq.length
|
35
23
|
end
|
36
24
|
|
25
|
+
# Return each element in the Sequence as an Element opbject, so it
|
26
|
+
# can be queried for gap? and undefined?
|
37
27
|
def each
|
38
28
|
@seq.each_char { | c | yield Element.new(c) }
|
39
29
|
end
|
@@ -42,6 +32,22 @@ module Bio
|
|
42
32
|
@seq.to_s
|
43
33
|
end
|
44
34
|
|
35
|
+
def << element
|
36
|
+
@seq += element.to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
def empty_copy
|
40
|
+
Sequence.new(@id,"")
|
41
|
+
end
|
42
|
+
|
43
|
+
def clone
|
44
|
+
Sequence.new(@id,@seq.clone)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Return Sequence (string) as an Elements object
|
48
|
+
def to_elements
|
49
|
+
Elements.new(@id,@seq)
|
50
|
+
end
|
45
51
|
end
|
46
52
|
end
|
47
53
|
end
|
data/lib/bio-alignment/state.rb
CHANGED
@@ -6,24 +6,80 @@ module Bio
|
|
6
6
|
attr_accessor :state
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
# any class you like
|
11
|
-
class ColumnState
|
9
|
+
module DeleteState
|
12
10
|
attr_accessor :deleted
|
13
11
|
|
12
|
+
def delete!
|
13
|
+
@deleted = true
|
14
|
+
end
|
15
|
+
|
14
16
|
def deleted?
|
15
|
-
deleted == true
|
17
|
+
@deleted == true
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
(deleted? ? 'X' : '.')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
module MarkState
|
26
|
+
attr_accessor :marked
|
27
|
+
|
28
|
+
def mark!
|
29
|
+
@marked = true
|
30
|
+
end
|
31
|
+
|
32
|
+
def unmark!
|
33
|
+
@marked = false
|
34
|
+
end
|
35
|
+
|
36
|
+
def marked?
|
37
|
+
@marked == true
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
(marked? ? 'X' : '.')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
module MaskState
|
46
|
+
attr_accessor :masked
|
47
|
+
|
48
|
+
def mask!
|
49
|
+
@masked = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def unmask!
|
53
|
+
@masked = false
|
54
|
+
end
|
55
|
+
|
56
|
+
def masked?
|
57
|
+
@masked == true
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_s
|
61
|
+
(masked? ? 'X' : '.')
|
16
62
|
end
|
17
63
|
end
|
18
64
|
|
65
|
+
# Convenience class for tracking state. Note you can add
|
66
|
+
# any class you like
|
67
|
+
class ColumnState
|
68
|
+
include DeleteState
|
69
|
+
end
|
70
|
+
|
19
71
|
# Convenience class for tracking state. Note you can add
|
20
72
|
# any class you like
|
21
73
|
class RowState
|
22
|
-
|
74
|
+
include DeleteState
|
75
|
+
end
|
23
76
|
|
24
|
-
|
25
|
-
|
26
|
-
|
77
|
+
class ElementState
|
78
|
+
include MarkState
|
79
|
+
end
|
80
|
+
|
81
|
+
class ElementMaskedState
|
82
|
+
include MaskState
|
27
83
|
end
|
28
84
|
|
29
85
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Bio
|
2
|
+
|
3
|
+
module BioAlignment
|
4
|
+
|
5
|
+
# The Tree module turns a tree into a traversable object, by wrapping
|
6
|
+
# BioRuby's basic tree objects. The Bio::Tree object can always be
|
7
|
+
# fetched using to_bioruby_tree.
|
8
|
+
|
9
|
+
module Tree
|
10
|
+
|
11
|
+
class Node
|
12
|
+
end
|
13
|
+
|
14
|
+
# Make all nodes in the Bio::Tree aware of the tree object so we can use
|
15
|
+
# its methods
|
16
|
+
def Tree::init tree
|
17
|
+
if tree.kind_of?(Bio::Tree)
|
18
|
+
# walk all nodes and infect the tree info
|
19
|
+
tree.each_node do | node |
|
20
|
+
node.inject_tree(tree)
|
21
|
+
end
|
22
|
+
# tree.root.set_tree(tree)
|
23
|
+
else
|
24
|
+
raise "BioAlignment::Tree does not understand tree type "+tree.class.to_s
|
25
|
+
end
|
26
|
+
return tree
|
27
|
+
end
|
28
|
+
|
29
|
+
def root
|
30
|
+
@tree.root
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
# Here we add to BioRuby's Bio::Tree classes
|
38
|
+
class Tree
|
39
|
+
class Node
|
40
|
+
def inject_tree tree
|
41
|
+
@tree = tree
|
42
|
+
end
|
43
|
+
|
44
|
+
def leaf?
|
45
|
+
children.size == 0
|
46
|
+
end
|
47
|
+
|
48
|
+
def children
|
49
|
+
@tree.children(self)
|
50
|
+
end
|
51
|
+
|
52
|
+
def parent
|
53
|
+
@tree.parent(self)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Get the distance to another node (FIXME: write test)
|
57
|
+
def distance other
|
58
|
+
@tree.distance(self,other)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def find name
|
63
|
+
get_node_by_name(name)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Walk the ordered tree leaves, calling into the block, and return an array
|
67
|
+
def map
|
68
|
+
res = []
|
69
|
+
leaves.each do | leaf |
|
70
|
+
item = yield leaf
|
71
|
+
res << item
|
72
|
+
end
|
73
|
+
res
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
data/spec/bio-alignment_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'bio-alignment'
|
4
|
+
require 'bigbio' # for the FastaReader
|
4
5
|
include Bio::BioAlignment # Namespace
|
5
6
|
|
6
7
|
describe "BioAlignment::CodonSequence" do
|
@@ -49,6 +50,61 @@ describe "BioAlignment::Alignment" do
|
|
49
50
|
aln.sequences.first.seq[0].to_aa.should == "M"
|
50
51
|
aln.sequences.first.seq[2].to_aa.should == "T"
|
51
52
|
end
|
53
|
+
end
|
54
|
+
|
52
55
|
|
56
|
+
describe "BioAlignment::DelBridges" do
|
57
|
+
require 'bio-alignment/edit/del_bridges'
|
58
|
+
string =
|
59
|
+
"""
|
60
|
+
----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV
|
61
|
+
SSIISNSFSRPTIIFSGCSTACSGK--SEQVCGFR---LSDV
|
62
|
+
SSIISNSFSRPTIIFSGCSTACSGKLTSEQVCGFR---LSDV
|
63
|
+
----PKLFSRPTIIFSGCSTACSGK--SEPVCGFRSFMLSDV
|
64
|
+
----------PTIIFSGCSKACSGKGLSELVCGFRSFMLSDV
|
65
|
+
----------PTIIFSGCSKACSGK-----FRSFRSFMLSAV
|
66
|
+
----------PTIIFSGCSKACSGK-----VCGIFHAVRSFM
|
67
|
+
----------PTIIFSGCSKACSGK--SELVCGFRSFMLSAV
|
68
|
+
-------------IFHAVR-TC-HP-----------------
|
69
|
+
"""
|
70
|
+
aln = Alignment.new(string.split(/\n/))
|
71
|
+
print aln.to_s,"\n"
|
72
|
+
columns = aln.columns
|
73
|
+
columns.should_not == nil
|
74
|
+
columns.should_not == []
|
75
|
+
columns.size.should == 42
|
76
|
+
# make sure we are using the same columns
|
77
|
+
aln.columns.should == columns
|
78
|
+
aln.extend DelBridges
|
79
|
+
aln2 = aln.mark_bridges
|
80
|
+
print aln2.to_s,"\n"
|
81
|
+
columns2 = aln2.columns
|
82
|
+
columns2.should_not == nil
|
83
|
+
columns2.should_not == []
|
84
|
+
columns2.count { |col| col.state.deleted? }.should == 6
|
85
|
+
aln2.columns[0].state.should == columns2[0].state
|
86
|
+
aln2.columns[0].state.should_not == columns[0].state
|
87
|
+
aln2.rows.first.to_s.should == "----SNSFSRPTIIFSGCSTACSGK--SELVCGFRSFMLSDV"
|
88
|
+
# now write out the alignment with deleted columns removed
|
89
|
+
aln3 = aln2.columns_where { |col| !col.state.deleted? }
|
90
|
+
print aln3.to_s,"\n"
|
91
|
+
aln3.rows.first.to_s.should == "SNSFSRPTIIFSGCSTACSGKSELVCGFRSFMLSDV"
|
53
92
|
end
|
54
93
|
|
94
|
+
describe "BioAlignment::DelBridges for codons" do
|
95
|
+
# We are going to do the same for a codon alignment
|
96
|
+
aln = Alignment.new
|
97
|
+
fasta = FastaReader.new('test/data/fasta/codon/codon-alignment.fa')
|
98
|
+
fasta.each do | rec |
|
99
|
+
aln.sequences << CodonSequence.new(rec.id, rec.seq)
|
100
|
+
end
|
101
|
+
aln.extend DelBridges
|
102
|
+
aln2 = aln.mark_bridges
|
103
|
+
# print aln2[0].to_s,"\n"
|
104
|
+
aln2.columns.size.should == 404
|
105
|
+
# count deleted columns
|
106
|
+
aln2.columns.count { |col| col.state.deleted? }.should == 5
|
107
|
+
# create new alignment
|
108
|
+
aln3 = aln2.columns_where { |col| !col.state.deleted? }
|
109
|
+
aln3.columns.size.should == 399
|
110
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-03-17 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio-logger
|
16
|
-
requirement: &
|
16
|
+
requirement: &26202820 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *26202820
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bio
|
27
|
-
requirement: &
|
27
|
+
requirement: &26201340 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,21 @@ dependencies:
|
|
32
32
|
version: 1.4.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *26201340
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rake
|
38
|
+
requirement: &26199400 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *26199400
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: bio-bigbio
|
38
|
-
requirement: &
|
49
|
+
requirement: &26197880 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ! '>'
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: 0.1.3
|
44
55
|
type: :development
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *26197880
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: cucumber
|
49
|
-
requirement: &
|
60
|
+
requirement: &26196760 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ! '>='
|
@@ -54,10 +65,10 @@ dependencies:
|
|
54
65
|
version: '0'
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *26196760
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: rspec
|
60
|
-
requirement: &
|
71
|
+
requirement: &26195120 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ~>
|
@@ -65,29 +76,29 @@ dependencies:
|
|
65
76
|
version: 2.3.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *26195120
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: bundler
|
71
|
-
requirement: &
|
82
|
+
requirement: &26194620 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
|
-
- -
|
85
|
+
- - ! '>='
|
75
86
|
- !ruby/object:Gem::Version
|
76
|
-
version: 1.0.
|
87
|
+
version: 1.0.21
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *26194620
|
80
91
|
- !ruby/object:Gem::Dependency
|
81
92
|
name: jeweler
|
82
|
-
requirement: &
|
93
|
+
requirement: &26193920 !ruby/object:Gem::Requirement
|
83
94
|
none: false
|
84
95
|
requirements:
|
85
|
-
- -
|
96
|
+
- - ! '>='
|
86
97
|
- !ruby/object:Gem::Version
|
87
|
-
version:
|
98
|
+
version: '0'
|
88
99
|
type: :development
|
89
100
|
prerelease: false
|
90
|
-
version_requirements: *
|
101
|
+
version_requirements: *26193920
|
91
102
|
description: Alignment handler for multiple sequence alignments (MSA)
|
92
103
|
email: pjotr.public01@thebird.nl
|
93
104
|
executables:
|
@@ -115,7 +126,10 @@ files:
|
|
115
126
|
- features/columns.feature
|
116
127
|
- features/edit/del_bridges-feature.rb
|
117
128
|
- features/edit/del_bridges.feature
|
129
|
+
- features/edit/del_non_informative_sequences-feature.rb
|
118
130
|
- features/edit/del_non_informative_sequences.feature
|
131
|
+
- features/edit/del_short_sequences-feature.rb
|
132
|
+
- features/edit/del_short_sequences.feature
|
119
133
|
- features/edit/gblocks-feature.rb
|
120
134
|
- features/edit/gblocks.feature
|
121
135
|
- features/edit/mask_islands-feature.rb
|
@@ -126,15 +140,26 @@ files:
|
|
126
140
|
- features/pal2nal.feature
|
127
141
|
- features/rows-feature.rb
|
128
142
|
- features/rows.feature
|
143
|
+
- features/tree-feature.rb
|
144
|
+
- features/tree.feature
|
129
145
|
- lib/bio-alignment.rb
|
130
146
|
- lib/bio-alignment/alignment.rb
|
131
147
|
- lib/bio-alignment/bioruby.rb
|
132
148
|
- lib/bio-alignment/codonsequence.rb
|
133
|
-
- lib/bio-alignment/
|
149
|
+
- lib/bio-alignment/columns.rb
|
134
150
|
- lib/bio-alignment/edit/del_bridges.rb
|
151
|
+
- lib/bio-alignment/edit/del_non_informative_sequences.rb
|
152
|
+
- lib/bio-alignment/edit/del_short_sequences.rb
|
153
|
+
- lib/bio-alignment/edit/edit_columns.rb
|
154
|
+
- lib/bio-alignment/edit/edit_rows.rb
|
155
|
+
- lib/bio-alignment/edit/mask_islands.rb
|
156
|
+
- lib/bio-alignment/edit/mask_serial_mutations.rb
|
157
|
+
- lib/bio-alignment/elements.rb
|
135
158
|
- lib/bio-alignment/pal2nal.rb
|
159
|
+
- lib/bio-alignment/rows.rb
|
136
160
|
- lib/bio-alignment/sequence.rb
|
137
161
|
- lib/bio-alignment/state.rb
|
162
|
+
- lib/bio-alignment/tree.rb
|
138
163
|
- spec/bio-alignment_spec.rb
|
139
164
|
- spec/spec_helper.rb
|
140
165
|
- test/data/fasta/codon/aa-alignment.fa
|
@@ -158,7 +183,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
158
183
|
version: '0'
|
159
184
|
segments:
|
160
185
|
- 0
|
161
|
-
hash:
|
186
|
+
hash: 1800672102634743595
|
162
187
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
163
188
|
none: false
|
164
189
|
requirements:
|