bio-dbla-classifier 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -1
- data/README.rdoc +15 -15
- data/VERSION +1 -1
- data/bio-dbla-classifier.gemspec +6 -6
- data/lib/bio/sequence/aa/aa.rb +137 -161
- data/spec/aa_spec.rb +44 -22
- metadata +15 -15
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.rdoc
CHANGED
@@ -6,9 +6,8 @@ If you use this plugin please quote,
|
|
6
6
|
Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
|
7
7
|
|
8
8
|
= Installation
|
9
|
-
|
10
|
-
|
11
|
-
install the gem. This will install the bioruby gem if it is not already installed on your system.
|
9
|
+
Ruby must be installed on your system. See http://rubylang.info/ for information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
|
10
|
+
install the gem. This will install the bioruby gem if it is not already installed on your system. The plugin has been tested on Ruby 1.9.2-p290.
|
12
11
|
|
13
12
|
gem install bio-dbla-classifier
|
14
13
|
|
@@ -41,34 +40,36 @@ install the gem. This will install the bioruby gem if it is not already installe
|
|
41
40
|
|
42
41
|
|
43
42
|
#get the block sharing group for this tag
|
44
|
-
|
43
|
+
puts dbl_seq.bs_group #=> 1
|
45
44
|
|
46
45
|
#get the length of the tag
|
47
46
|
puts dbl_seq.size #=> 115
|
48
47
|
|
48
|
+
dbl_seq.is_var1? #=> false
|
49
|
+
|
49
50
|
= Finding the Position Specific Polymorphic Blocks(PSPB)
|
50
51
|
|
51
|
-
The pspb methods take 2 arguments, an anchor position and a window length that
|
52
|
+
The pspb methods take 2 arguments, an anchor position and a window length that
|
53
|
+
defines the length of the pspb.The default anchor position is 0 and the default
|
54
|
+
window length is 14
|
52
55
|
|
53
56
|
#get pspb1
|
54
|
-
puts seq.pspb1
|
57
|
+
puts seq.pspb1 #=> NPEVEKGLKAVFRK
|
55
58
|
|
56
59
|
#get pspb2
|
57
|
-
puts seq.pspb2
|
60
|
+
puts seq.pspb2 #=> THYADEDGSGNYVK
|
58
61
|
|
59
62
|
#get pspb3
|
60
|
-
puts seq.pspb3
|
63
|
+
puts seq.pspb3 #=> CKAPQSVHYFIKTS
|
61
64
|
|
62
65
|
#get pspb4
|
63
|
-
puts seq.pspb4
|
64
|
-
|
65
|
-
= Processing fasta files
|
66
|
+
puts seq.pspb4 #=> FTSHGKCGRNETNV
|
66
67
|
|
67
|
-
|
68
|
+
= Processing a flatfile for example fasta, genbank or embl
|
68
69
|
|
69
70
|
seq_file = "sequences.fasta"
|
70
71
|
|
71
|
-
#
|
72
|
+
#for each entry in the file
|
72
73
|
Bio::FlatFile.open(seq_file).each do |entry|
|
73
74
|
tag = Bio::Sequence::AA.new(entry.seq)
|
74
75
|
puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
@@ -76,5 +77,4 @@ If the input is a fasta file,
|
|
76
77
|
|
77
78
|
= Copyright
|
78
79
|
|
79
|
-
See LICENSE.txt for further details
|
80
|
-
|
80
|
+
See LICENSE.txt for further details
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/bio-dbla-classifier.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-dbla-classifier"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["George Githinji"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2012-02-20"
|
13
13
|
s.description = "Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
14
14
|
s.email = "georgkam@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -35,7 +35,7 @@ Gem::Specification.new do |s|
|
|
35
35
|
s.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
|
36
36
|
s.licenses = ["Ruby"]
|
37
37
|
s.require_paths = ["lib"]
|
38
|
-
s.rubygems_version = "1.8.
|
38
|
+
s.rubygems_version = "1.8.12"
|
39
39
|
s.summary = "A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
40
40
|
|
41
41
|
if s.respond_to? :specification_version then
|
@@ -44,20 +44,20 @@ Gem::Specification.new do |s|
|
|
44
44
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
45
|
s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
|
46
46
|
s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
|
47
|
-
s.add_development_dependency(%q<bundler>, ["~> 1.
|
47
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.1.rc.7"])
|
48
48
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
49
49
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
50
50
|
else
|
51
51
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
52
52
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
53
|
-
s.add_dependency(%q<bundler>, ["~> 1.
|
53
|
+
s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
|
54
54
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
55
55
|
s.add_dependency(%q<rcov>, [">= 0"])
|
56
56
|
end
|
57
57
|
else
|
58
58
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
59
59
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
60
|
-
s.add_dependency(%q<bundler>, ["~> 1.
|
60
|
+
s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
|
61
61
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
62
62
|
s.add_dependency(%q<rcov>, [">= 0"])
|
63
63
|
end
|
data/lib/bio/sequence/aa/aa.rb
CHANGED
@@ -1,180 +1,156 @@
|
|
1
1
|
class Bio::Sequence::AA
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def start_motif
|
8
|
-
self[0,5]
|
9
|
-
end
|
10
|
-
|
11
|
-
def end_motif
|
12
|
-
self[-5,self.length]
|
13
|
-
end
|
14
|
-
|
15
|
-
def dsid
|
16
|
-
"#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
|
17
|
-
end
|
18
|
-
|
19
|
-
def ww_pos
|
20
|
-
rindex("WW")
|
21
|
-
end
|
22
|
-
|
23
|
-
def vw_pos
|
24
|
-
rindex("VW")
|
25
|
-
end
|
26
|
-
|
27
|
-
#number of cysteines
|
28
|
-
def cys_count
|
29
|
-
scan(/C/).size
|
30
|
-
end
|
31
|
-
|
32
|
-
#The first position of limited variability(polv1)
|
33
|
-
def polv1
|
34
|
-
self[10,4]
|
35
|
-
end
|
36
|
-
|
37
|
-
#The second position of limited variability(polv2)
|
38
|
-
def polv2
|
39
|
-
if self =~ /WW/
|
40
|
-
polv2 = self[ww_pos - 4,4]
|
41
|
-
elsif self =~ /VW/
|
42
|
-
polv2 = self[vw_pos - 12,4]
|
43
|
-
else
|
44
|
-
error = 'WW or VW motif missing'
|
45
|
-
end
|
46
|
-
polv2 unless error
|
47
|
-
end
|
48
|
-
|
49
|
-
#The third position of limited variability(polv3)
|
50
|
-
def polv3
|
51
|
-
if self =~ /WW/
|
52
|
-
polv3 = self[ww_pos + 10,4]
|
53
|
-
elsif self =~ /VW/
|
54
|
-
polv3 = self[vw_pos + 2,4]
|
55
|
-
else
|
56
|
-
error = 'WW or VW motif missing'
|
57
|
-
end
|
58
|
-
|
59
|
-
polv3 unless error
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
#The fourth position of limited variability(polv4)
|
64
|
-
def polv4
|
65
|
-
self[self.length - 12,4]
|
66
|
-
end
|
67
|
-
|
68
|
-
#Assigning dsid group based on cysteines coun and presence of
|
69
|
-
#REY motif in polv2, MFK in polv1,
|
70
|
-
def cyspolv_group
|
71
|
-
case
|
72
|
-
when cys_count > 4 || cys_count == 3 || cys_count < 2
|
73
|
-
group = 6
|
74
|
-
when cys_count == 4 && polv2 =~ /REY/i
|
75
|
-
group = 5
|
76
|
-
when cys_count == 4
|
77
|
-
group = 4
|
78
|
-
when cys_count == 2 && polv1 =~ /MFK/i
|
79
|
-
group = 1
|
80
|
-
when cys_count == 2 && polv2 =~ /REY/i
|
81
|
-
group =2
|
82
|
-
else
|
83
|
-
group = 3
|
84
|
-
end
|
85
|
-
group
|
86
|
-
end
|
87
|
-
|
88
|
-
#position specific polymorphic block 1
|
89
|
-
def pspb1(anchor_pos,win_len)
|
90
|
-
self[14 + anchor_pos,win_len]
|
91
|
-
end
|
92
|
-
|
93
|
-
#position specific polymorphic block 2
|
94
|
-
def pspb2(anchor_pos,win_len)
|
95
|
-
if self =~ /WW/
|
96
|
-
pspb2 = self[ww_pos - 4 - anchor_pos - win_len, win_len]
|
97
|
-
elsif self =~ /VW/
|
98
|
-
pspb2 = self[vw_pos - 12 - win_len - anchor_pos, win_len]
|
99
|
-
else
|
100
|
-
error = 'WW or VW motif missing'
|
101
|
-
end
|
102
|
-
pspb2
|
103
|
-
end
|
104
|
-
|
105
|
-
#position specific polymorphic block 3
|
106
|
-
def pspb3(anchor_pos,win_len)
|
107
|
-
if self =~ /WW/
|
108
|
-
pspb3 = self[ww_pos + 14 + anchor_pos, win_len]
|
109
|
-
elsif self =~ /VW/
|
110
|
-
pspb3 = self[vw_pos + 6 + anchor_pos, win_len]
|
111
|
-
else
|
112
|
-
error = 'WW or VW motif missing'
|
113
|
-
end
|
114
|
-
pspb3
|
115
|
-
end
|
116
|
-
|
117
|
-
#position specific polymorphic block 4
|
118
|
-
def pspb4(anchor_pos,win_len)
|
119
|
-
self[self.length - 12 - win_len - anchor_pos, win_len]
|
120
|
-
end
|
121
|
-
|
122
|
-
|
123
|
-
private
|
124
|
-
def accepted_length
|
125
|
-
100..168
|
126
|
-
end
|
3
|
+
def has_accepted_length?
|
4
|
+
true if accepted_length.include? self.length
|
5
|
+
end
|
127
6
|
|
128
|
-
|
7
|
+
def start_motif
|
8
|
+
self[0,5]
|
9
|
+
end
|
10
|
+
|
11
|
+
def end_motif
|
12
|
+
self[-5,self.length]
|
13
|
+
end
|
14
|
+
|
15
|
+
def ww_pos
|
16
|
+
rindex("WW")
|
17
|
+
end
|
18
|
+
|
19
|
+
def vw_pos
|
20
|
+
rindex("VW")
|
21
|
+
end
|
22
|
+
|
23
|
+
#number of cysteines
|
24
|
+
def cys_count
|
25
|
+
scan(/C/).size
|
26
|
+
end
|
27
|
+
|
28
|
+
#The first position of limited variability(polv1)
|
29
|
+
def polv1
|
30
|
+
self[10,4]
|
31
|
+
end
|
32
|
+
|
33
|
+
#The second position of limited variability(polv2)
|
34
|
+
def polv2
|
35
|
+
if !ww_missing?
|
36
|
+
return self[ww_pos - 4,4]
|
37
|
+
elsif !vw_missing?
|
38
|
+
return self[vw_pos - 12,4]
|
39
|
+
else
|
40
|
+
return '....'
|
41
|
+
end
|
42
|
+
end
|
129
43
|
|
130
|
-
#
|
131
|
-
|
44
|
+
#The third position of limited variability(polv3)
|
45
|
+
def polv3
|
46
|
+
if !ww_missing?
|
47
|
+
return self[ww_pos + 10,4]
|
48
|
+
elsif !vw_missing?
|
49
|
+
return self[vw_pos + 2,4]
|
50
|
+
else
|
51
|
+
return '....'
|
52
|
+
end
|
53
|
+
end
|
132
54
|
|
133
|
-
#
|
134
|
-
|
55
|
+
#The fourth position of limited variability(polv4)
|
56
|
+
def polv4
|
57
|
+
self[self.length - 12,4]
|
58
|
+
end
|
135
59
|
|
136
|
-
#
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
60
|
+
#Assigning dsid group based on number of cysteines, presence of REY motif in polv2 and MFK in polv1,
|
61
|
+
def cyspolv_group
|
62
|
+
case
|
63
|
+
when cys_count > 4 || cys_count == 3 || cys_count < 2
|
64
|
+
group = 6
|
65
|
+
when cys_count == 4 && polv2 =~ /REY/i
|
66
|
+
group = 5
|
67
|
+
when cys_count == 4
|
68
|
+
group = 4
|
69
|
+
when cys_count == 2 && polv1 =~ /MFK/i
|
70
|
+
group = 1
|
71
|
+
when cys_count == 2 && polv2 =~ /REY/i
|
72
|
+
group =2
|
73
|
+
else
|
74
|
+
group = 3
|
75
|
+
end
|
76
|
+
group
|
77
|
+
end
|
141
78
|
|
142
|
-
|
143
|
-
|
79
|
+
def is_var1_cp1?
|
80
|
+
return true if cyspolv_group == 1 && self =~ /NVHDKVEKGLREVF|NVHDKVETGLREVF/i
|
81
|
+
end
|
144
82
|
|
145
|
-
|
146
|
-
|
83
|
+
def is_var1_cp2?
|
84
|
+
return true if cyspolv_group == 2 && self =~ /APNKEKIKLEENLKK/i
|
85
|
+
end
|
147
86
|
|
148
|
-
|
149
|
-
|
87
|
+
def is_var1?
|
88
|
+
return true if is_var1_cp1? || is_var1_cp2?
|
89
|
+
end
|
150
90
|
|
91
|
+
def bs_group
|
92
|
+
case
|
93
|
+
when self =~ /(?:R(?:HYADHDKSGNYYK|NENNNLGKLSNEQ|I(?:RHYDDGSGNY(?:SK|YK)|THYN(?:GVSGNCVK|DGSGNYVK))|E(?:RYKDLKDVEIDD|HYKEVKNGNY(?:YK|IK)|KYKDLKD(?:VEIDD|LPIDD))|VKETYKDDPNYYK|KNNSSLRKLTNEQ)|G(?:G(?:RGRK(?:KLEDNL(?:IE|KE)|QLEENLQK)|GGRKKLEDNLKE)|I(?:N(?:D(?:Y(?:ND(?:GSGNYFK|ISGNYYK)|D(?:RDGPE(?:HYK|YYK)|GDGPEYYK))|CDRDGPEYYK)|AYNDGSENYYK)|IDYDHDGPHYYK)|P(?:SQEK(?:IKLEENLK|KKLEENLK)|KQEKKELEENLK)|K(?:NYPDDGSGN(?:YYK|FYK)|K(?:YYND(?:G(?:SGNYYK|TGNYVK)|ETGNYYK)|KEKEKIYGNIE)|QYYNDENGNYYK)|QTYPDDGSGNYYK)|MESNANLKKHTLER|S(?:STNTQCRCATNDV|HY(?:TDTHGSIDYDK|ED(?:GDKSGNYYK|KDKSGNYIK)|ADHDKSGNY(?:YK|LK))|NKEKEKIENSLQN|DYKDDD(?:GSGNYYK|IDGNYY(?:K|Q))|E(?:GKCGHKETERDL|KVEYGLRKLFKK)|VQERYGNDPNFFQ|KI(?:NDYDGDPNYYK|TDYDNDPNYYK)|QGQCGRNENNGYP|F(?:S(?:SEYCGH(?:RQGS(?:V|A)|GDNEV|EQGNV)|N(?:GQCGHRDENV|SKCGHGEHEV|DYCGH(?:RQGSV|NENKV)|PKCGH(?:G(?:DNEV|EHEV)|NENKV|EQGNV)|EYCGHRQGSV)|D(?:RKCGHYEGAP|HKCGH(?:GDKDV|YEGAP|DENAP)))|TN(?:GQCGHNEENV|PKCG(?:RGDNEV|H(?:G(?:DNEV|EHEV)|DENKV)))|ADAYCGRGDENV)|LILPYSKCGRDTD)|H(?:HYKDDDISGNYSK|SKEKEKLQTNLKN|N(?:H(?:IKKPLLENLEQ|KKKPLL(?:DNLEK|ENLEQ))|NKKKALLDNLEK|QKKINLEKSLHR)|EQG(?:YNKLEAI(?:SKT|LKT)|NNKLEA(?:RLKT|ILKT))|QQRK(?:RKLEENLRN|GKLEENLRN))|C(?:RAP(?:N(?:GANYFRKGL|EANYFKNVA)|KNAHYFIKSS|QKANYFKNVA)|G(?:TGENDTYFKNSS|A(?:G(?:EKDTYF(?:TYS(?:N|K)|VQLD)|A(?:RDEYFIKPS|KDTYFTYSK))|TMNDIFSKNIG|LPKSAY(?:VLQSE|F(?:MQLE|LQSE))))|S(?:VPYEAYYFTYKS|A(?:G(?:PKDTYFIKSG|QKDTYFIKPN)|D(?:GSE(?:DYFIKSS|EYF(?:IQSE|KKQS))|DSEDYFI(?:RSE|QSE))|P(?:RDA(?:DYFIKNS|QYFIKSS)|GD(?:VNYFRK(?:GL|IS|ES|FS)|AKYVK(?:YFP|NFP))|HNAQYVKYVP|Y(?:GANYYRKYS|HPGYFRQSK|YADYF(?:RKGS|K(?:SVA|KK(?:S|P)))|NAHYFIKSS|CADYFKKKS|DANYVRRKS|EA(?:YYFTYKS|QYFIKSS)|KSQYFIKSS)|D(?:YAKYFRQTC|NAKY(?:VKYFP|FKPPK)))|QNNEVYFINSE))|Y(?:IPY(?:YVNYFK(?:NIS|DIS|K(?:T(?:S|P)|KS))|CVNYFKNIS)|APNNANYFIGSG)|NAP(?:GD(?:VHYFRKDP|AHYFRKDP)|Y(?:DANY(?:YR(?:KYS|QTC)|VRRKS|FRKTS)|EAQY(?:YIKSS|FIK(?:SS|PS))|K(?:SRYF(?:M(?:HSE|QSE)|IQSE)|A(?:QYYIKSS|WYFMHSE)))|NISGYFMQS(?:G|E)|D(?:NVNYFRKYS|KAEYFVYKS))|TAP(?:YGANYYRKYS|D(?:NVNYFRKYS|KANYFIYKS))|IAPRDAHYFLKSS|D(?:TEESDTYFKQSS|A(?:SYKSGYFMQSE|P(?:RDA(?:HYFLKSS|NFFIKNS|DYF(?:RKGS|KNVA))|YKSRYF(?:MQSE|IQSE)|KDANYFIGSG|QKVDYFRK(?:GS|IS))))|EA(?:SKNANFFIK(?:NS|DS)|P(?:GDAHYFRKGP|ENAYIIKRRI|KDANYFIGSG|QKVDYFRKG(?:S|L)))|VA(?:GEGNTYFIQLD|PENAYFRKTEA)|KA(?:S(?:RNA(?:HYFLKSS|NYFRK(?:IS|AL))|KNANFFIKNS)|P(?:GDVN(?:YFRKIS|FFIKNS)|NGANYFRKKS|TGA(?:HYFLKSS|DYF(?:VYKP|KKKS))|PKVDYSRNIS|EDADYFRKGS|K(?:GANYFRKES|DA(?:HYFLKSS|N(?:YFIGSG|FFIKIS)|DYFRKGS))|Q(?:GANYFRNIS|SVHYFIKTS|DANYF(?:R(?:N(?:IS|VS)|K(?:GL|IS))|TKES)|K(?:VDYFRKGS|ANYFRKGS)))|KEGDIYSKTTD)|FA(?:HNTEEYFIKSE|DGSEEYFI(?:KSS|QS(?:S|E)))|AAR(?:GNDLYSKNIG|YHPGYFKKSD))|Y(?:NERDR(?:DKKRKLQE|EKKRKLQD|AQKKKLQD)|DEKEKNRRKQLEN|KAP(?:RKA(?:NYFIYKS|DYFRNIS)|KDAHYFLKSS|QDANYFRNVS))|N(?:RKEKGKLQTNLKN|GDYK(?:EKVSNNL(?:RA|KT)|KKVSNNLKT)|HYKDD(?:NGS(?:GNYYK|ENYYK)|D(?:GSGNYYK|ISGNY(?:SK|YK)))|SDDKVE(?:NGLKKVF|KGLREVF)|Y(?:Y(?:NNTGNN(?:V(?:NYAK|DYVK)|ANYAK)|ADGDKSGNYYK)|NYDEDGSGNY(?:YK|VK))|N(?:HDNVE(?:NGL(?:REVF|KAVF)|KGLK(?:KVF|AVF))|D(?:ND(?:RVKKEKLQN|K(?:IKK(?:GKLRG|EKL(?:RG|QE))|VKKEKL(?:RG|QN)))|DDKIKKGKLRG|V(?:EKGL(?:DVVFKK|KVVFKK)|VKGLDVVFKK))|E(?:SE(?:IKRKEKLRG|KKKREELQG)|TDKEQKVKLEK|KDM(?:REKQKLQS|TEKQKLQS))|VDAVQEGLKVVF|KE(?:NEKLQENLKR|KEKIEKSLQN))|TVDK(?:IHEGLKVVF|VHEGLKVVF)|D(?:NVE(?:NGLREVFKK|KGLK(?:KVFDK|AVFRK))|DVEKGLKIVFEK|EDDVEKGLKIVF|K(?:D(?:YVENGLKKVF|A(?:V(?:RHGLKVVF|QKGLRAVF)|AQKVLRTVF))|EKDQRKKLDE(?:N|I)|VE(?:NGL(?:REVFKK|KKVFDK)|KGL(?:REVF(?:RK|KK)|QVVFGK)))|QD(?:DVEKGLKIVF|EVWNGLRSVF)|ADKV(?:EKGLQVVF|QKGLQVVF))|P(?:E(?:DKVHEGLKVVF|VE(?:NGLREVFNK|KGLKAVFRK))|QDKVQ(?:EGLK(?:NVF|VVF)|KGLREVF))|E(?:MVEIGLKKVFKK|HYKEVKNGNYVK|NVEKGL(?:K(?:IVFEK|KVFDK)|QVVFGK)|D(?:DKVQKGLQVVF|VEKGLKVVF(?:KK|QK))|EDAVQKGL(?:RAVF|K(?:VVF|KVF))|K(?:DAVQNGLKKVF|VE(?:YGLRKLFKK|IGLKKVF(?:DK|EK|KK)))|QDEVWKGLRDVF)|VH(?:YK(?:DDGS(?:GNYYK|ENYYK)|EVKNGNYVK)|DKVE(?:RGLREVF|TGLREVF|KGL(?:REVF|QVV(?:F|L))))|K(?:HDNIEKGLREVF|N(?:NVPL(?:HNLSLDK|DKLSLDK)|K(?:SPLDKLSLEQ|PPLDKLSVDK)|VEIGLK(?:NVFKN|KVFDK))|DDK(?:IEKSLRAIF|V(?:EKGLRAIF|QKGL(?:RAVF|KAVF|QVVF)))|Q(?:RKKILQEKLEN|EKEKREKLDEN))|FSNPKCGH(?:DEGIV|KQGNV)|QEDKVQEGLKVVF|LILTHPKCGHDTD)|I(?:SYYNADEKGNFYK|HNYDDNGSGNYYK|E(?:TRY(?:GSDTTNYYQ|ENDGPNYYQ)|ARYKKDDDNYYQ)|VSFDQCGHND(?:MDV|VDV)|KNDKTLNNLSNGQ|FSNEHCGHKQGSV)|T(?:SEGQCGHNDKMRP|HYADEDGS(?:GNYVK|ENYYK)|D(?:N(?:D(?:EVW(?:TGLRSVF|KGL(?:RSVF|GSVF))|AVQKGLRAVF)|VE(?:NGLREVFKK|KGLRAVFGK))|K(?:D(?:YVENGLK(?:KVF|AVF)|DVENGLREVF|EV(?:KEGLKVVF|WKGLRAVF)|AVQKGLRAVF)|VENGLK(?:EVFDK|KVFD(?:N|K))))|E(?:GYCGRNENNGYP|TLYKDEEGNYLK|KDDVEKGLKIVF)|V(?:S(?:SNKCGHNDMDV|NAKRREGDENP|FDQCGHNDM(?:HV|DV))|K(?:GTYKDDPYYYK|ETYKDDPNYYK))|F(?:S(?:GYWCGHYEGAP|NDYCGHGEHEV)|TYTKCGHDENKV))|D(?:SRTDKLEENLRKI|NNSDKLRDLSVDK|DDVEKGLKIVFEK|K(?:GEKKKLEKNLKD|NRGKLGALSLDD))|P(?:SY(?:IKCGHNNKDDP|LKCGHNNKDDP)|HYTNDRGLADYVK)|E(?:HY(?:EDVDGSGNYLK|KDVDGSGNYYK)|YY(?:NDTNNKINYVK|EDKDPDKNYYQ)|KNY(?:YNDGTGNYYK|PDDGSGNYSK)|FT(?:GGYCGRDETDV|SGYCGRNETNV))|V(?:NGNDKLESNLKKI|KAHY(?:KKDAPYYYK|QKDAPNYYK)|FSNRQCGHYED(?:VP|AP))|K(?:RYYNDDTD(?:NN(?:FYQ|LYQ)|DNFYQ)|GINDYDGDPNYYK|SYY(?:NAD(?:GEGNFYK|EKGNYYK)|DADEKGNYYK)|HY(?:TDTHGSIDYDK|A(?:H(?:GDGSGNY(?:SK|YK|LK)|DDGS(?:GNYYK|VNYYK))|DEDGSGNYYK))|YYNDTNNKINYVK|N(?:Y(?:YNPD(?:G(?:SGNYYK|AGNYYK)|EAGNYYK)|NYD(?:EDGPEYYK|KDGPEYYK))|NDRTLNNLSIGQ|DYNPDGSGNY(?:YK|FK)|E(?:SE(?:IKRKEKLQR|K(?:RTKEKLQG|NTKKKLQG))|NTDLNKLTTEK)|KNTKLSTLTLEK|AYPDDG(?:SGNY(?:YK|FK)|FGNYYK))|T(?:SN(?:SN(?:MDTLSLEQ|LKELSLDK)|TNMNTLSLDK)|IYADLKDVEIDD)|I(?:NDYDGDGPEYYK|THYDDISGNYYK|KDYDGDGPEYYK)|D(?:HY(?:KDEKDGNFFQ|QDDGTGNYYK)|YYNADEKGNYYK)|PHYKDDGFGNYYK|E(?:YYQDDGTGNYYK|ISDYDNDPNYYK|EYGDLKDVPIDD|KYGDLKDVPIDD)|V(?:HYKENKDGNY(?:YK|VK)|KY(?:PDL(?:ND(?:IEIDD|VEIDD)|KDLQIDD)|QDLKDVEIDD))|K(?:HY(?:ENDTDKNYYQ|KKDEDPNYYK)|VYPEDVTGNY(?:YK|FK)|KKKGLSELSTEK)|FS(?:SDRCGHNEGDP|ERKCGH(?:NEGSP|DENAP))|Q(?:HYK(?:DDGSVNYYK|EDKDENYYK)|NNKKLKDLTDKH)|A(?:RYKDRKDPNYYK|K(?:YEDLKTLPIDD|ERYKDIKNYYQ)))|F(?:S(?:S(?:SGPCGRDEAPV|HGKCGHNEGAP|DRCGHNNNDGP|E(?:GKCGHKEGTV|YCGHYKNGDP)|QGQCGHTEGTV)|N(?:RGPCGRNETDV|S(?:G(?:TRGRKELTV|PCGRKELTV|KCGGKEAPV)|KCGHHNNDGP)|N(?:GPCGRNETDV|KCGHSNGGDP)|PKCGHSNGGDP|E(?:HCGH(?:HNNDDP|YKNGDP)|YCGH(?:YKNGDP|KKNEDP)))|DNG(?:HCGRNETNV|PCGRKELIV))|T(?:RQGYCGHSETNV|GGGQC(?:RRNDNSV|GRNETDV)|S(?:HGKCG(?:RNETNV|HSEGAP)|IGKCGHNKGSV|EG(?:RCGHSETNV|KCG(?:RNETNV|HNDNRV)|Q(?:RGHSETNV|C(?:RRNDNSV|GH(?:SETNV|NDKSV|DENKV))))|VGYCGHNKG(?:SV|IR)|QG(?:YCG(?:RKE(?:LTV|APV)|HSETNV)|QCG(?:RNERNV|H(?:SETNV|TEGTV|KEGTV)))|AGKCR(?:RNDNSV|HNDNSV))|NDGKCG(?:RYEGAP|H(?:YE(?:GAP|NNI|D(?:NV|AP))|TEGTV))|TEGYCGR(?:NEGAP|DEGAP)|D(?:GHCGRTQEGHV|IGKCG(?:GKEAPV|H(?:GDKDV|N(?:EGAP|KGSV)|KQGNV))|DGKCGHYEGAP))|ENAGKC(?:RRNDNKV|GHNDNRV)|WDRKCGHSN(?:GGDP|EGA(?:P|L))|LYPKCGHNNKNDL)|Q(?:GIIDYDNDPNYYK|NYY(?:KDDPKKNYYK|ADDGSGNY(?:SK|VK))|ISDY(?:TGDHPNYYK|DGDGPEYYK)|K(?:SDSSLQRLSIEK|HYEDDGSGNYYK|NNSALKKLTDKQ|IY(?:EDINNLPIDD|KDLNNLPIDD))|QNNNTLENLTDKQ)|L(?:K(?:TRYKKDDDNYYQ|KHYQKDAPNYYK)|F(?:S(?:N(?:RQCGH(?:GEHEV|NEGAP|DENKV|EQGNV)|SKCGH(?:RQGNV|DE(?:SKV|NKV)|EQGNV)|YKCGHYEDAP|DYCGHKQGNV|PKCGH(?:EQG(?:NV|TV)|KQGKV)|AYCGHYEGSP)|D(?:GHCGNKDGTV|HKCGHEESRV|YKCGHYE(?:GSP|DAP)))|DYNCGHHKDNNV|WDRKCGHDERNV)|Q(?:TRYTNDGDNYFK|ERYNDPKGDFFQ|ARYKKDGDDFFK)|WNDKCGHHVDKDV|LFSNYKCGHYEGS)|A(?:RYKKDEEDGNYY(?:K|Q)|VSSNKCGHNDMNV|KNDYTGDHPNYYK|LKHYKDDTKNYYQ))/
|
94
|
+
block_sharing = 1
|
95
|
+
when self =~ /(?:RE(?:PGKQHLEERLER|KGKSRLEARLKT)|G(?:G(?:HYKNCHCIGGDV|TYKNCRCASGNV)|P(?:NQEKKLLENKLK|DQEKKKLEENLR)|ANAIKAGDNVSIV)|S(?:FTNGQCGRDGENV|WYPKCGHHVKQDV|AKEHYQDTENYYK)|H(?:NRRKEKLETRLEE|E(?:PG(?:IQ(?:HLEKRLES|YLEKRLES)|KQHL(?:GERLEQ|EERLE(?:R|Q)))|QG(?:YNRLEARLKT|NNRLEARLKT|INRLEARLKT))|QQRKHLLEKRLET)|C(?:RAEEK(?:GTYFKNRE|D(?:TYFKNRE|IYSKTTD)|EIYSKTTD)|G(?:TEDKDTYFIKSG|VEENAKYFRESS|A(?:GMKDIYSKTMN|SEDAKYKVIGP|T(?:MNDIFSKNIR|VDDI(?:SSKNIR|FSKNIR))|P(?:SDAQYFRNTC|KEAKYFRKTA)))|HAPPDAQYTKKGP|NA(?:P(?:TGADYFVYKP|KDANYFEYNS)|WGNTYFRKTCS)|DAG(?:QKDTYFKQSS|AADEYFKKSG)|EA(?:GTSDKYFRKTA|KSDDKYNVIGP)|KA(?:NDDAEYFRKKD|P(?:DKANYF(?:EPPK|KPPK)|EEDHYFKPAQ)|EVDDIYSKTAN|K(?:EGDIYSKT(?:MN|AN)|KG(?:GIYSKTMN|DIYSKTMN))))|Y(?:SQKYKDEKSKLEE|NE(?:TDKVQKAILQQ|KDQEEKRKLQE))|N(?:YYEDNDTDKNYYQ|NNAAKLSELSTAQ|THESAQRKKLEEN|DEEKKKRDELEKN|KN(?:NPPLYKLSLEK|KSPLDKLSLDK))|T(?:RYKKDDEDGNFFQ|PTQGKCHCIDGT(?:N|V)|L(?:FDYKCGHDENAP|WNEKCGHGDYNL))|I(?:EHYKDDPEENFYE|KS(?:NYNDSEGNYFK|QYDDNEGNYFK)|LFDYKCAHDNDKV)|D(?:RKEKVKLEENLKN|KGEKKKLEENLKN|Q(?:ERK(?:HLLEKRLET|QHLEKRLET)|QEK(?:LYLENNLKK|AKLENNLKR))|AKKHYGDDENYYK)|P(?:NKCRCEDANADQV|CSVQKCTCINGDP)|E(?:SN(?:M(?:GQCRCFSGDP|VQCRCFSGDP)|KGQCRCFSGDP)|THGYCRCVNRVDV|F(?:SGGKCGHKDNNV|T(?:GGQCGRDGENV|DGHCGH(?:RQGNV|NEENV))))|VKDRYQNDGPDFFK|K(?:S(?:SYNDDGTGNYFK|YYKNDNDRNYFK)|N(?:HYNDTSKNYYK|NNN(?:ELNNLSLDK|KLSNLSTKE))|D(?:HYKGDEANNYFQ|NNTKLNDLSIQE)|K(?:TNPALKSFTNEE|KLEENLRNIFKN)|FSNPKCGHNEGSP)|F(?:SNDQCGHNN(?:RGDP|GGAP)|WYPKC(?:GHHV(?:RQDV|KQ(?:DV|EV))|SHHVKQDV))|Q(?:RNNIKLQ(?:NIPLHE|TLTLHQ)|N(?:NNTKLQNIPLHE|KNENLKSLSLDK)|KENGDINTLKPEE)|L(?:FYYKCGHYVYKDV|WN(?:Y(?:NCGHHVN(?:RDV|QDV)|KCGHHVNQDV)|DKCGHHVKQDV))|ARDHYNDTSGNYYQ)/
|
96
|
+
block_sharing = 2
|
97
|
+
else
|
98
|
+
block_sharing = 0
|
99
|
+
end
|
100
|
+
block_sharing
|
101
|
+
end
|
151
102
|
|
152
|
-
#
|
153
|
-
|
103
|
+
#distict sequence identifier(DSID)
|
104
|
+
def dsid
|
105
|
+
"#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
|
106
|
+
end
|
154
107
|
|
155
|
-
#
|
156
|
-
|
108
|
+
#position specific polymorphic block 1
|
109
|
+
def pspb1(anchor_pos=0,win_len=14)
|
110
|
+
self[14 + anchor_pos,win_len]
|
111
|
+
end
|
157
112
|
|
158
|
-
#
|
159
|
-
|
113
|
+
#position specific polymorphic block 2
|
114
|
+
def pspb2(anchor_pos=0,win_len=14)
|
115
|
+
if !ww_missing?
|
116
|
+
return self[ww_pos - 4 - anchor_pos - win_len, win_len]
|
117
|
+
elsif !vw_missing?
|
118
|
+
return self[vw_pos - 12 - win_len - anchor_pos, win_len]
|
119
|
+
else
|
120
|
+
return '....'
|
121
|
+
end
|
122
|
+
end
|
160
123
|
|
161
|
-
#
|
162
|
-
|
124
|
+
#position specific polymorphic block 3
|
125
|
+
def pspb3(anchor_pos=0,win_len=14)
|
126
|
+
if !ww_missing?
|
127
|
+
return self[ww_pos + 14 + anchor_pos, win_len]
|
128
|
+
elsif !vw_missing?
|
129
|
+
return self[vw_pos + 6 + anchor_pos, win_len]
|
130
|
+
else
|
131
|
+
return '....'
|
132
|
+
end
|
133
|
+
end
|
163
134
|
|
164
|
-
#
|
165
|
-
|
135
|
+
#position specific polymorphic block 4
|
136
|
+
def pspb4(anchor_pos=0,win_len=14)
|
137
|
+
self[self.length - 12 - win_len - anchor_pos, win_len]
|
138
|
+
end
|
166
139
|
|
167
|
-
|
168
|
-
|
140
|
+
private
|
141
|
+
def accepted_length
|
142
|
+
100..168
|
143
|
+
end
|
169
144
|
|
145
|
+
def ww_missing?
|
146
|
+
true unless self =~ /WW/i
|
147
|
+
end
|
170
148
|
|
171
|
-
|
172
|
-
|
149
|
+
def vw_missing?
|
150
|
+
true unless self =~ /VW/i
|
151
|
+
end
|
173
152
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
#puts tag.end_motif
|
179
|
-
#puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
180
|
-
#end
|
153
|
+
def vw_ww_missing?
|
154
|
+
true if ww_missing? && vw_missing?
|
155
|
+
end
|
156
|
+
end
|
data/spec/aa_spec.rb
CHANGED
@@ -1,32 +1,54 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe "Dbla" do
|
4
|
-
context '
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
it "should return the number of cysteines" do
|
11
|
-
@tag.cys_count.should == 4
|
12
|
-
end
|
4
|
+
context 'Group4 Dbla tag' do
|
5
|
+
before(:each) do
|
6
|
+
seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
|
7
|
+
@tag = Bio::Sequence::AA.new(seq)
|
8
|
+
end
|
13
9
|
|
14
|
-
|
15
|
-
|
16
|
-
|
10
|
+
it "should return the number of cysteines" do
|
11
|
+
@tag.cys_count.should == 4
|
12
|
+
end
|
17
13
|
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
it 'should return a dsid' do
|
15
|
+
@tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
|
16
|
+
end
|
21
17
|
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
it 'should return the cyspolv group' do
|
19
|
+
@tag.cyspolv_group.should == 4
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should return the length' do
|
23
|
+
@tag.length.should == 126
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should return the start motif' do
|
27
|
+
@tag.start_motif.should == 'YIGDI'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should return false for var1' do
|
31
|
+
@tag.is_var1?.should be_false
|
32
|
+
end
|
25
33
|
|
26
|
-
it 'should return the start motif' do
|
27
|
-
@tag.start_motif == 'YIGDI'
|
28
34
|
end
|
29
35
|
|
30
|
-
|
31
|
-
|
36
|
+
context 'Group2 Dbla tag' do
|
37
|
+
before(:each) do
|
38
|
+
seq = 'DIGDIVRGTDLFLGGPSQEKKKLEENLKKILENIKNKNTKLSTLTLEKVREYWWALNRNDVWKALTCSAPYEAQYFIKSSDKEHSFSSEYCGHHNNDDPLTNLDYVPQFLR'
|
39
|
+
@tag2 = Bio::Sequence::AA.new(seq)
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'should return the number of cysteines' do
|
43
|
+
@tag2.cys_count.should == 2
|
44
|
+
end
|
32
45
|
|
46
|
+
it 'should return the block sharing group 1' do
|
47
|
+
@tag2.bs_group.should == 1
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should return false for cp2 var1' do
|
51
|
+
@tag2.is_var1_cp2?.should be_false
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-dbla-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-02-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement: &
|
16
|
+
requirement: &2152850760 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.4.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152850760
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2152849320 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,21 +32,21 @@ dependencies:
|
|
32
32
|
version: 2.3.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152849320
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bundler
|
38
|
-
requirement: &
|
38
|
+
requirement: &2152848240 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 1.
|
43
|
+
version: 1.1.rc.7
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2152848240
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: jeweler
|
49
|
-
requirement: &
|
49
|
+
requirement: &2152846960 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.6.4
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2152846960
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rcov
|
60
|
-
requirement: &
|
60
|
+
requirement: &2152855080 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2152855080
|
69
69
|
description: Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags
|
70
70
|
email: georgkam@gmail.com
|
71
71
|
executables: []
|
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
103
103
|
version: '0'
|
104
104
|
segments:
|
105
105
|
- 0
|
106
|
-
hash:
|
106
|
+
hash: 3330781070760048632
|
107
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
108
|
none: false
|
109
109
|
requirements:
|
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
version: '0'
|
113
113
|
requirements: []
|
114
114
|
rubyforge_project:
|
115
|
-
rubygems_version: 1.8.
|
115
|
+
rubygems_version: 1.8.12
|
116
116
|
signing_key:
|
117
117
|
specification_version: 3
|
118
118
|
summary: A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags
|