bio-dbla-classifier 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +50 -29
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/bio-dbla-classifier.gemspec +4 -4
- data/lib/bio/sequence/aa/aa.rb +57 -7
- metadata +15 -16
data/README.rdoc
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
= bio-dbla-classifier
|
2
2
|
|
3
|
-
DBL-alpha tags can be classified into six expression groups depending on the number of cysteines and presence of
|
4
|
-
|
5
|
-
|
3
|
+
DBL-alpha tags are small regions of the PfEMP1 protein that can be PCR amplified and are classified into six expression groups depending on the number of cysteines and presence of
|
4
|
+
certain motifs within the tag region (Bull et al 2007). This plugin extends bioruby's amino acid class, Bio::Sequence::AA by adding methods to analyze DBL-alpha sequences tags.
|
5
|
+
If you use this plugin please quote,
|
6
6
|
Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
|
7
7
|
|
8
8
|
= Installation
|
9
|
+
You need to have Ruby installed on your system. This plugin has been tested on Ruby 1.9.2-p290. See http://rubylang.info/ for
|
10
|
+
information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
|
11
|
+
install the gem. This will install the bioruby gem if it is not already installed on your system.
|
9
12
|
|
10
13
|
gem install bio-dbla-classifier
|
11
14
|
|
@@ -14,44 +17,62 @@ Bull et al “An approach to classifying sequence tags sampled from Plasmodium f
|
|
14
17
|
gem uninstall bio-dbla-classifier
|
15
18
|
|
16
19
|
= Usage
|
20
|
+
#create an instance of Bioruby's Bio::Sequence::AA class with methods to classify and describe DBL-alpha tags.
|
21
|
+
|
17
22
|
require 'bio-dbla-classifier'
|
18
23
|
|
19
|
-
|
20
|
-
|
24
|
+
seq ='DIGDIVRGRDMFKSNPEVEKGLKAVFRKINNGLTPQAKTHYADEDGSGNYVKLREDWWKANRDQVWKAITCKAPQSVHYFIKTSHGTRGFTSHGKCGRNETNVPTNLDYVPQYLR'
|
25
|
+
dbl_seq = Bio::Sequence::AA.new(seq)
|
26
|
+
|
27
|
+
#get the positions of limited variability
|
28
|
+
puts dbl_seq.polv1 #=> MFKS
|
29
|
+
puts dbl_seq.polv2 #=> LRED
|
30
|
+
puts dbl_seq.polv3 #=> KAIT
|
31
|
+
puts dbl_seq.polv4 #=> PTNL
|
32
|
+
|
33
|
+
#get the number of cysteines in the tag
|
34
|
+
puts dbl_seq.cys_count #=> 2
|
35
|
+
|
36
|
+
#get the distinct sequence identifier
|
37
|
+
puts dbl_seq.dsid #=>MFKS-LRED-KAIT-2-PTNL-115
|
38
|
+
|
39
|
+
#get the cyspolv group for this tag
|
40
|
+
puts dbl_seq.cyspolv_group #=> 1
|
41
|
+
|
42
|
+
|
43
|
+
#get the block sharing group for this tag
|
44
|
+
#puts dbl_seq.bs_group #to be implemented
|
45
|
+
|
46
|
+
#get the length of the tag
|
47
|
+
puts dbl_seq.size #=> 115
|
21
48
|
|
22
|
-
|
23
|
-
#seq = Bio::Sequence::AA.new(seq1)
|
49
|
+
= Finding the Position Specific Polymorphic Blocks(PSPB)
|
24
50
|
|
25
|
-
|
26
|
-
#puts seq.polv1
|
27
|
-
#puts seq.polv2
|
28
|
-
#puts seq.polv3
|
29
|
-
#puts seq.polv4
|
51
|
+
The pspb methods take 2 arguments, an anchor position and a window length that defines the length of the pspb
|
30
52
|
|
31
|
-
#get
|
32
|
-
|
53
|
+
#get pspb1
|
54
|
+
puts seq.pspb1(0,14) #=> NPEVEKGLKAVFRK
|
33
55
|
|
34
|
-
#get
|
35
|
-
|
56
|
+
#get pspb2
|
57
|
+
puts seq.pspb2(0,14) #=> THYADEDGSGNYVK
|
36
58
|
|
37
|
-
#get
|
38
|
-
|
59
|
+
#get pspb3
|
60
|
+
puts seq.pspb3(0,14) #=> CKAPQSVHYFIKTS
|
39
61
|
|
62
|
+
#get pspb4
|
63
|
+
puts seq.pspb4(0,14) #=> FTSHGKCGRNETNV
|
40
64
|
|
41
|
-
|
42
|
-
#puts seq.bs_group #to be implemented
|
65
|
+
= Processing fasta files
|
43
66
|
|
44
|
-
|
45
|
-
#puts seq.size
|
67
|
+
If the input is a fasta file,
|
46
68
|
|
47
|
-
|
48
|
-
#seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
|
69
|
+
seq_file = "sequences.fasta"
|
49
70
|
|
50
|
-
#
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
71
|
+
#Process each entry in the file
|
72
|
+
Bio::FlatFile.open(seq_file).each do |entry|
|
73
|
+
tag = Bio::Sequence::AA.new(entry.seq)
|
74
|
+
puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
75
|
+
end
|
55
76
|
|
56
77
|
= Copyright
|
57
78
|
|
data/Rakefile
CHANGED
@@ -17,8 +17,8 @@ Jeweler::Tasks.new do |gem|
|
|
17
17
|
gem.name = "bio-dbla-classifier"
|
18
18
|
gem.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
|
19
19
|
gem.license = "Ruby"
|
20
|
-
gem.summary = %Q{
|
21
|
-
gem.description = %Q{
|
20
|
+
gem.summary = %Q{A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags}
|
21
|
+
gem.description = %Q{Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags}
|
22
22
|
gem.email = "georgkam@gmail.com"
|
23
23
|
gem.authors = ["George Githinji"]
|
24
24
|
# dependencies defined in Gemfile
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/bio-dbla-classifier.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-dbla-classifier"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["George Githinji"]
|
12
|
-
s.date = "2011-10-
|
13
|
-
s.description = "
|
12
|
+
s.date = "2011-10-13"
|
13
|
+
s.description = "Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
14
14
|
s.email = "georgkam@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE.txt",
|
@@ -36,7 +36,7 @@ Gem::Specification.new do |s|
|
|
36
36
|
s.licenses = ["Ruby"]
|
37
37
|
s.require_paths = ["lib"]
|
38
38
|
s.rubygems_version = "1.8.10"
|
39
|
-
s.summary = "
|
39
|
+
s.summary = "A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
40
40
|
|
41
41
|
if s.respond_to? :specification_version then
|
42
42
|
s.specification_version = 3
|
data/lib/bio/sequence/aa/aa.rb
CHANGED
@@ -41,19 +41,23 @@ class Bio::Sequence::AA
|
|
41
41
|
elsif self =~ /VW/
|
42
42
|
polv2 = self[vw_pos - 12,4]
|
43
43
|
else
|
44
|
+
error = 'WW or VW motif missing'
|
44
45
|
end
|
45
|
-
polv2
|
46
|
+
polv2 unless error
|
46
47
|
end
|
47
48
|
|
48
49
|
#The third position of limited variability(polv3)
|
49
50
|
def polv3
|
50
51
|
if self =~ /WW/
|
51
|
-
|
52
|
+
polv3 = self[ww_pos + 10,4]
|
52
53
|
elsif self =~ /VW/
|
53
|
-
|
54
|
+
polv3 = self[vw_pos + 2,4]
|
54
55
|
else
|
56
|
+
error = 'WW or VW motif missing'
|
55
57
|
end
|
56
|
-
|
58
|
+
|
59
|
+
polv3 unless error
|
60
|
+
|
57
61
|
end
|
58
62
|
|
59
63
|
#The fourth position of limited variability(polv4)
|
@@ -81,19 +85,52 @@ class Bio::Sequence::AA
|
|
81
85
|
group
|
82
86
|
end
|
83
87
|
|
84
|
-
|
88
|
+
#position specific polymorphic block 1
|
89
|
+
def pspb1(anchor_pos,win_len)
|
90
|
+
self[14 + anchor_pos,win_len]
|
91
|
+
end
|
92
|
+
|
93
|
+
#position specific polymorphic block 2
|
94
|
+
def pspb2(anchor_pos,win_len)
|
95
|
+
if self =~ /WW/
|
96
|
+
pspb2 = self[ww_pos - 4 - anchor_pos - win_len, win_len]
|
97
|
+
elsif self =~ /VW/
|
98
|
+
pspb2 = self[vw_pos - 12 - win_len - anchor_pos, win_len]
|
99
|
+
else
|
100
|
+
error = 'WW or VW motif missing'
|
101
|
+
end
|
102
|
+
pspb2
|
103
|
+
end
|
104
|
+
|
105
|
+
#position specific polymorphic block 3
|
106
|
+
def pspb3(anchor_pos,win_len)
|
107
|
+
if self =~ /WW/
|
108
|
+
pspb3 = self[ww_pos + 14 + anchor_pos, win_len]
|
109
|
+
elsif self =~ /VW/
|
110
|
+
pspb3 = self[vw_pos + 6 + anchor_pos, win_len]
|
111
|
+
else
|
112
|
+
error = 'WW or VW motif missing'
|
113
|
+
end
|
114
|
+
pspb3
|
115
|
+
end
|
85
116
|
|
117
|
+
#position specific polymorphic block 4
|
118
|
+
def pspb4(anchor_pos,win_len)
|
119
|
+
self[self.length - 12 - win_len - anchor_pos, win_len]
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
private
|
86
124
|
def accepted_length
|
87
125
|
100..168
|
88
126
|
end
|
89
127
|
|
90
128
|
end
|
91
129
|
|
92
|
-
|
93
130
|
#create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
|
94
131
|
#to classify and describe Dbla properties
|
95
132
|
|
96
|
-
#seq1 =
|
133
|
+
#seq1 ='DIGDIVRGRDMFKSNPEVEKGLKAVFRKINNGLTPQAKTHYADEDGSGNYVKLREDWWKANRDQVWKAITCKAPQSVHYFIKTSHGTRGFTSHGKCGRNETNVPTNLDYVPQYLR'
|
97
134
|
#seq = Bio::Sequence::AA.new(seq1)
|
98
135
|
|
99
136
|
#get the positions of limited variability
|
@@ -118,6 +155,19 @@ end
|
|
118
155
|
#get the length of the tag
|
119
156
|
#puts seq.size
|
120
157
|
|
158
|
+
#get the pspb1
|
159
|
+
#puts seq.pspb1(0,14)
|
160
|
+
|
161
|
+
#get the pspb2
|
162
|
+
#puts seq.pspb2(0,14)
|
163
|
+
|
164
|
+
#get the pspb3
|
165
|
+
#puts seq.pspb3(0,14)
|
166
|
+
|
167
|
+
#get the pspb4
|
168
|
+
#puts seq.pspb4(0,14)
|
169
|
+
|
170
|
+
|
121
171
|
#if input file is a fasta file
|
122
172
|
#seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
|
123
173
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-dbla-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-10-
|
12
|
+
date: 2011-10-13 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement: &
|
16
|
+
requirement: &2155795540 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.4.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2155795540
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2155794860 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.3.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2155794860
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bundler
|
38
|
-
requirement: &
|
38
|
+
requirement: &2155787960 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.0.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2155787960
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: jeweler
|
49
|
-
requirement: &
|
49
|
+
requirement: &2155786180 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.6.4
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2155786180
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rcov
|
60
|
-
requirement: &
|
60
|
+
requirement: &2155784980 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,9 +65,8 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
69
|
-
description:
|
70
|
-
approach described by Bull et al 2007
|
68
|
+
version_requirements: *2155784980
|
69
|
+
description: Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags
|
71
70
|
email: georgkam@gmail.com
|
72
71
|
executables: []
|
73
72
|
extensions: []
|
@@ -104,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
103
|
version: '0'
|
105
104
|
segments:
|
106
105
|
- 0
|
107
|
-
hash:
|
106
|
+
hash: -1659999011793222104
|
108
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
108
|
none: false
|
110
109
|
requirements:
|
@@ -116,5 +115,5 @@ rubyforge_project:
|
|
116
115
|
rubygems_version: 1.8.10
|
117
116
|
signing_key:
|
118
117
|
specification_version: 3
|
119
|
-
summary:
|
118
|
+
summary: A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags
|
120
119
|
test_files: []
|