bio-dbla-classifier 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +50 -29
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/bio-dbla-classifier.gemspec +4 -4
- data/lib/bio/sequence/aa/aa.rb +57 -7
- metadata +15 -16
data/README.rdoc
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
= bio-dbla-classifier
|
2
2
|
|
3
|
-
DBL-alpha tags can be classified into six expression groups depending on the number of cysteines and presence of
|
4
|
-
|
5
|
-
|
3
|
+
DBL-alpha tags are small regions of the PfEMP1 protein that can be PCR amplified and are classified into six expression groups depending on the number of cysteines and presence of
|
4
|
+
certain motifs within the tag region (Bull et al 2007). This plugin extends bioruby's amino acid class, Bio::Sequence::AA by adding methods to analyze DBL-alpha sequences tags.
|
5
|
+
If you use this plugin please quote,
|
6
6
|
Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
|
7
7
|
|
8
8
|
= Installation
|
9
|
+
You need to have Ruby installed on your system. This plugin has been tested on Ruby 1.9.2-p290. See http://rubylang.info/ for
|
10
|
+
information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
|
11
|
+
install the gem. This will install the bioruby gem if it is not already installed on your system.
|
9
12
|
|
10
13
|
gem install bio-dbla-classifier
|
11
14
|
|
@@ -14,44 +17,62 @@ Bull et al “An approach to classifying sequence tags sampled from Plasmodium f
|
|
14
17
|
gem uninstall bio-dbla-classifier
|
15
18
|
|
16
19
|
= Usage
|
20
|
+
#create an instance of Bioruby's Bio::Sequence::AA class with methods to classify and describe DBL-alpha tags.
|
21
|
+
|
17
22
|
require 'bio-dbla-classifier'
|
18
23
|
|
19
|
-
|
20
|
-
|
24
|
+
seq ='DIGDIVRGRDMFKSNPEVEKGLKAVFRKINNGLTPQAKTHYADEDGSGNYVKLREDWWKANRDQVWKAITCKAPQSVHYFIKTSHGTRGFTSHGKCGRNETNVPTNLDYVPQYLR'
|
25
|
+
dbl_seq = Bio::Sequence::AA.new(seq)
|
26
|
+
|
27
|
+
#get the positions of limited variability
|
28
|
+
puts dbl_seq.polv1 #=> MFKS
|
29
|
+
puts dbl_seq.polv2 #=> LRED
|
30
|
+
puts dbl_seq.polv3 #=> KAIT
|
31
|
+
puts dbl_seq.polv4 #=> PTNL
|
32
|
+
|
33
|
+
#get the number of cysteines in the tag
|
34
|
+
puts dbl_seq.cys_count #=> 2
|
35
|
+
|
36
|
+
#get the distinct sequence identifier
|
37
|
+
puts dbl_seq.dsid #=>MFKS-LRED-KAIT-2-PTNL-115
|
38
|
+
|
39
|
+
#get the cyspolv group for this tag
|
40
|
+
puts dbl_seq.cyspolv_group #=> 1
|
41
|
+
|
42
|
+
|
43
|
+
#get the block sharing group for this tag
|
44
|
+
#puts dbl_seq.bs_group #to be implemented
|
45
|
+
|
46
|
+
#get the length of the tag
|
47
|
+
puts dbl_seq.size #=> 115
|
21
48
|
|
22
|
-
|
23
|
-
#seq = Bio::Sequence::AA.new(seq1)
|
49
|
+
= Finding the Position Specific Polymorphic Blocks(PSPB)
|
24
50
|
|
25
|
-
|
26
|
-
#puts seq.polv1
|
27
|
-
#puts seq.polv2
|
28
|
-
#puts seq.polv3
|
29
|
-
#puts seq.polv4
|
51
|
+
The pspb methods take 2 arguments, an anchor position and a window length that defines the length of the pspb
|
30
52
|
|
31
|
-
#get
|
32
|
-
|
53
|
+
#get pspb1
|
54
|
+
puts seq.pspb1(0,14) #=> NPEVEKGLKAVFRK
|
33
55
|
|
34
|
-
#get
|
35
|
-
|
56
|
+
#get pspb2
|
57
|
+
puts seq.pspb2(0,14) #=> THYADEDGSGNYVK
|
36
58
|
|
37
|
-
#get
|
38
|
-
|
59
|
+
#get pspb3
|
60
|
+
puts seq.pspb3(0,14) #=> CKAPQSVHYFIKTS
|
39
61
|
|
62
|
+
#get pspb4
|
63
|
+
puts seq.pspb4(0,14) #=> FTSHGKCGRNETNV
|
40
64
|
|
41
|
-
|
42
|
-
#puts seq.bs_group #to be implemented
|
65
|
+
= Processing fasta files
|
43
66
|
|
44
|
-
|
45
|
-
#puts seq.size
|
67
|
+
If the input is a fasta file,
|
46
68
|
|
47
|
-
|
48
|
-
#seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
|
69
|
+
seq_file = "sequences.fasta"
|
49
70
|
|
50
|
-
#
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
71
|
+
#Process each entry in the file
|
72
|
+
Bio::FlatFile.open(seq_file).each do |entry|
|
73
|
+
tag = Bio::Sequence::AA.new(entry.seq)
|
74
|
+
puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
75
|
+
end
|
55
76
|
|
56
77
|
= Copyright
|
57
78
|
|
data/Rakefile
CHANGED
@@ -17,8 +17,8 @@ Jeweler::Tasks.new do |gem|
|
|
17
17
|
gem.name = "bio-dbla-classifier"
|
18
18
|
gem.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
|
19
19
|
gem.license = "Ruby"
|
20
|
-
gem.summary = %Q{
|
21
|
-
gem.description = %Q{
|
20
|
+
gem.summary = %Q{A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags}
|
21
|
+
gem.description = %Q{Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags}
|
22
22
|
gem.email = "georgkam@gmail.com"
|
23
23
|
gem.authors = ["George Githinji"]
|
24
24
|
# dependencies defined in Gemfile
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/bio-dbla-classifier.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-dbla-classifier"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["George Githinji"]
|
12
|
-
s.date = "2011-10-
|
13
|
-
s.description = "
|
12
|
+
s.date = "2011-10-13"
|
13
|
+
s.description = "Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
14
14
|
s.email = "georgkam@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE.txt",
|
@@ -36,7 +36,7 @@ Gem::Specification.new do |s|
|
|
36
36
|
s.licenses = ["Ruby"]
|
37
37
|
s.require_paths = ["lib"]
|
38
38
|
s.rubygems_version = "1.8.10"
|
39
|
-
s.summary = "
|
39
|
+
s.summary = "A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
40
40
|
|
41
41
|
if s.respond_to? :specification_version then
|
42
42
|
s.specification_version = 3
|
data/lib/bio/sequence/aa/aa.rb
CHANGED
@@ -41,19 +41,23 @@ class Bio::Sequence::AA
|
|
41
41
|
elsif self =~ /VW/
|
42
42
|
polv2 = self[vw_pos - 12,4]
|
43
43
|
else
|
44
|
+
error = 'WW or VW motif missing'
|
44
45
|
end
|
45
|
-
polv2
|
46
|
+
polv2 unless error
|
46
47
|
end
|
47
48
|
|
48
49
|
#The third position of limited variability(polv3)
|
49
50
|
def polv3
|
50
51
|
if self =~ /WW/
|
51
|
-
|
52
|
+
polv3 = self[ww_pos + 10,4]
|
52
53
|
elsif self =~ /VW/
|
53
|
-
|
54
|
+
polv3 = self[vw_pos + 2,4]
|
54
55
|
else
|
56
|
+
error = 'WW or VW motif missing'
|
55
57
|
end
|
56
|
-
|
58
|
+
|
59
|
+
polv3 unless error
|
60
|
+
|
57
61
|
end
|
58
62
|
|
59
63
|
#The fourth position of limited variability(polv4)
|
@@ -81,19 +85,52 @@ class Bio::Sequence::AA
|
|
81
85
|
group
|
82
86
|
end
|
83
87
|
|
84
|
-
|
88
|
+
#position specific polymorphic block 1
|
89
|
+
def pspb1(anchor_pos,win_len)
|
90
|
+
self[14 + anchor_pos,win_len]
|
91
|
+
end
|
92
|
+
|
93
|
+
#position specific polymorphic block 2
|
94
|
+
def pspb2(anchor_pos,win_len)
|
95
|
+
if self =~ /WW/
|
96
|
+
pspb2 = self[ww_pos - 4 - anchor_pos - win_len, win_len]
|
97
|
+
elsif self =~ /VW/
|
98
|
+
pspb2 = self[vw_pos - 12 - win_len - anchor_pos, win_len]
|
99
|
+
else
|
100
|
+
error = 'WW or VW motif missing'
|
101
|
+
end
|
102
|
+
pspb2
|
103
|
+
end
|
104
|
+
|
105
|
+
#position specific polymorphic block 3
|
106
|
+
def pspb3(anchor_pos,win_len)
|
107
|
+
if self =~ /WW/
|
108
|
+
pspb3 = self[ww_pos + 14 + anchor_pos, win_len]
|
109
|
+
elsif self =~ /VW/
|
110
|
+
pspb3 = self[vw_pos + 6 + anchor_pos, win_len]
|
111
|
+
else
|
112
|
+
error = 'WW or VW motif missing'
|
113
|
+
end
|
114
|
+
pspb3
|
115
|
+
end
|
85
116
|
|
117
|
+
#position specific polymorphic block 4
|
118
|
+
def pspb4(anchor_pos,win_len)
|
119
|
+
self[self.length - 12 - win_len - anchor_pos, win_len]
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
private
|
86
124
|
def accepted_length
|
87
125
|
100..168
|
88
126
|
end
|
89
127
|
|
90
128
|
end
|
91
129
|
|
92
|
-
|
93
130
|
#create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
|
94
131
|
#to classify and describe Dbla properties
|
95
132
|
|
96
|
-
#seq1 =
|
133
|
+
#seq1 ='DIGDIVRGRDMFKSNPEVEKGLKAVFRKINNGLTPQAKTHYADEDGSGNYVKLREDWWKANRDQVWKAITCKAPQSVHYFIKTSHGTRGFTSHGKCGRNETNVPTNLDYVPQYLR'
|
97
134
|
#seq = Bio::Sequence::AA.new(seq1)
|
98
135
|
|
99
136
|
#get the positions of limited variability
|
@@ -118,6 +155,19 @@ end
|
|
118
155
|
#get the length of the tag
|
119
156
|
#puts seq.size
|
120
157
|
|
158
|
+
#get the pspb1
|
159
|
+
#puts seq.pspb1(0,14)
|
160
|
+
|
161
|
+
#get the pspb2
|
162
|
+
#puts seq.pspb2(0,14)
|
163
|
+
|
164
|
+
#get the pspb3
|
165
|
+
#puts seq.pspb3(0,14)
|
166
|
+
|
167
|
+
#get the pspb4
|
168
|
+
#puts seq.pspb4(0,14)
|
169
|
+
|
170
|
+
|
121
171
|
#if input file is a fasta file
|
122
172
|
#seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
|
123
173
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-dbla-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-10-
|
12
|
+
date: 2011-10-13 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement: &
|
16
|
+
requirement: &2155795540 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.4.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2155795540
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2155794860 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.3.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2155794860
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bundler
|
38
|
-
requirement: &
|
38
|
+
requirement: &2155787960 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.0.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2155787960
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: jeweler
|
49
|
-
requirement: &
|
49
|
+
requirement: &2155786180 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.6.4
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2155786180
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rcov
|
60
|
-
requirement: &
|
60
|
+
requirement: &2155784980 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,9 +65,8 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
69
|
-
description:
|
70
|
-
approach described by Bull et al 2007
|
68
|
+
version_requirements: *2155784980
|
69
|
+
description: Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags
|
71
70
|
email: georgkam@gmail.com
|
72
71
|
executables: []
|
73
72
|
extensions: []
|
@@ -104,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
103
|
version: '0'
|
105
104
|
segments:
|
106
105
|
- 0
|
107
|
-
hash:
|
106
|
+
hash: -1659999011793222104
|
108
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
108
|
none: false
|
110
109
|
requirements:
|
@@ -116,5 +115,5 @@ rubyforge_project:
|
|
116
115
|
rubygems_version: 1.8.10
|
117
116
|
signing_key:
|
118
117
|
specification_version: 3
|
119
|
-
summary:
|
118
|
+
summary: A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags
|
120
119
|
test_files: []
|