bio-dbla-classifier 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -1
- data/README.rdoc +15 -15
- data/VERSION +1 -1
- data/bio-dbla-classifier.gemspec +6 -6
- data/lib/bio/sequence/aa/aa.rb +137 -161
- data/spec/aa_spec.rb +44 -22
- metadata +15 -15
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.rdoc
CHANGED
@@ -6,9 +6,8 @@ If you use this plugin please quote,
|
|
6
6
|
Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
|
7
7
|
|
8
8
|
= Installation
|
9
|
-
|
10
|
-
|
11
|
-
install the gem. This will install the bioruby gem if it is not already installed on your system.
|
9
|
+
Ruby must be installed on your system. See http://rubylang.info/ for information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
|
10
|
+
install the gem. This will install the bioruby gem if it is not already installed on your system. The plugin has been tested on Ruby 1.9.2-p290.
|
12
11
|
|
13
12
|
gem install bio-dbla-classifier
|
14
13
|
|
@@ -41,34 +40,36 @@ install the gem. This will install the bioruby gem if it is not already installe
|
|
41
40
|
|
42
41
|
|
43
42
|
#get the block sharing group for this tag
|
44
|
-
|
43
|
+
puts dbl_seq.bs_group #=> 1
|
45
44
|
|
46
45
|
#get the length of the tag
|
47
46
|
puts dbl_seq.size #=> 115
|
48
47
|
|
48
|
+
dbl_seq.is_var1? #=> false
|
49
|
+
|
49
50
|
= Finding the Position Specific Polymorphic Blocks(PSPB)
|
50
51
|
|
51
|
-
The pspb methods take 2 arguments, an anchor position and a window length that
|
52
|
+
The pspb methods take 2 arguments, an anchor position and a window length that
|
53
|
+
defines the length of the pspb.The default anchor position is 0 and the default
|
54
|
+
window length is 14
|
52
55
|
|
53
56
|
#get pspb1
|
54
|
-
puts seq.pspb1
|
57
|
+
puts seq.pspb1 #=> NPEVEKGLKAVFRK
|
55
58
|
|
56
59
|
#get pspb2
|
57
|
-
puts seq.pspb2
|
60
|
+
puts seq.pspb2 #=> THYADEDGSGNYVK
|
58
61
|
|
59
62
|
#get pspb3
|
60
|
-
puts seq.pspb3
|
63
|
+
puts seq.pspb3 #=> CKAPQSVHYFIKTS
|
61
64
|
|
62
65
|
#get pspb4
|
63
|
-
puts seq.pspb4
|
64
|
-
|
65
|
-
= Processing fasta files
|
66
|
+
puts seq.pspb4 #=> FTSHGKCGRNETNV
|
66
67
|
|
67
|
-
|
68
|
+
= Processing a flatfile for example fasta, genbank or embl
|
68
69
|
|
69
70
|
seq_file = "sequences.fasta"
|
70
71
|
|
71
|
-
#
|
72
|
+
#for each entry in the file
|
72
73
|
Bio::FlatFile.open(seq_file).each do |entry|
|
73
74
|
tag = Bio::Sequence::AA.new(entry.seq)
|
74
75
|
puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
@@ -76,5 +77,4 @@ If the input is a fasta file,
|
|
76
77
|
|
77
78
|
= Copyright
|
78
79
|
|
79
|
-
See LICENSE.txt for further details
|
80
|
-
|
80
|
+
See LICENSE.txt for further details
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/bio-dbla-classifier.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-dbla-classifier"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["George Githinji"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2012-02-20"
|
13
13
|
s.description = "Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
14
14
|
s.email = "georgkam@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -35,7 +35,7 @@ Gem::Specification.new do |s|
|
|
35
35
|
s.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
|
36
36
|
s.licenses = ["Ruby"]
|
37
37
|
s.require_paths = ["lib"]
|
38
|
-
s.rubygems_version = "1.8.
|
38
|
+
s.rubygems_version = "1.8.12"
|
39
39
|
s.summary = "A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags"
|
40
40
|
|
41
41
|
if s.respond_to? :specification_version then
|
@@ -44,20 +44,20 @@ Gem::Specification.new do |s|
|
|
44
44
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
45
|
s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
|
46
46
|
s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
|
47
|
-
s.add_development_dependency(%q<bundler>, ["~> 1.
|
47
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.1.rc.7"])
|
48
48
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
49
49
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
50
50
|
else
|
51
51
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
52
52
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
53
|
-
s.add_dependency(%q<bundler>, ["~> 1.
|
53
|
+
s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
|
54
54
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
55
55
|
s.add_dependency(%q<rcov>, [">= 0"])
|
56
56
|
end
|
57
57
|
else
|
58
58
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
59
59
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
60
|
-
s.add_dependency(%q<bundler>, ["~> 1.
|
60
|
+
s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
|
61
61
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
62
62
|
s.add_dependency(%q<rcov>, [">= 0"])
|
63
63
|
end
|
data/lib/bio/sequence/aa/aa.rb
CHANGED
@@ -1,180 +1,156 @@
|
|
1
1
|
class Bio::Sequence::AA
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def start_motif
|
8
|
-
self[0,5]
|
9
|
-
end
|
10
|
-
|
11
|
-
def end_motif
|
12
|
-
self[-5,self.length]
|
13
|
-
end
|
14
|
-
|
15
|
-
def dsid
|
16
|
-
"#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
|
17
|
-
end
|
18
|
-
|
19
|
-
def ww_pos
|
20
|
-
rindex("WW")
|
21
|
-
end
|
22
|
-
|
23
|
-
def vw_pos
|
24
|
-
rindex("VW")
|
25
|
-
end
|
26
|
-
|
27
|
-
#number of cysteines
|
28
|
-
def cys_count
|
29
|
-
scan(/C/).size
|
30
|
-
end
|
31
|
-
|
32
|
-
#The first position of limited variability(polv1)
|
33
|
-
def polv1
|
34
|
-
self[10,4]
|
35
|
-
end
|
36
|
-
|
37
|
-
#The second position of limited variability(polv2)
|
38
|
-
def polv2
|
39
|
-
if self =~ /WW/
|
40
|
-
polv2 = self[ww_pos - 4,4]
|
41
|
-
elsif self =~ /VW/
|
42
|
-
polv2 = self[vw_pos - 12,4]
|
43
|
-
else
|
44
|
-
error = 'WW or VW motif missing'
|
45
|
-
end
|
46
|
-
polv2 unless error
|
47
|
-
end
|
48
|
-
|
49
|
-
#The third position of limited variability(polv3)
|
50
|
-
def polv3
|
51
|
-
if self =~ /WW/
|
52
|
-
polv3 = self[ww_pos + 10,4]
|
53
|
-
elsif self =~ /VW/
|
54
|
-
polv3 = self[vw_pos + 2,4]
|
55
|
-
else
|
56
|
-
error = 'WW or VW motif missing'
|
57
|
-
end
|
58
|
-
|
59
|
-
polv3 unless error
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
#The fourth position of limited variability(polv4)
|
64
|
-
def polv4
|
65
|
-
self[self.length - 12,4]
|
66
|
-
end
|
67
|
-
|
68
|
-
#Assigning dsid group based on cysteines coun and presence of
|
69
|
-
#REY motif in polv2, MFK in polv1,
|
70
|
-
def cyspolv_group
|
71
|
-
case
|
72
|
-
when cys_count > 4 || cys_count == 3 || cys_count < 2
|
73
|
-
group = 6
|
74
|
-
when cys_count == 4 && polv2 =~ /REY/i
|
75
|
-
group = 5
|
76
|
-
when cys_count == 4
|
77
|
-
group = 4
|
78
|
-
when cys_count == 2 && polv1 =~ /MFK/i
|
79
|
-
group = 1
|
80
|
-
when cys_count == 2 && polv2 =~ /REY/i
|
81
|
-
group =2
|
82
|
-
else
|
83
|
-
group = 3
|
84
|
-
end
|
85
|
-
group
|
86
|
-
end
|
87
|
-
|
88
|
-
#position specific polymorphic block 1
|
89
|
-
def pspb1(anchor_pos,win_len)
|
90
|
-
self[14 + anchor_pos,win_len]
|
91
|
-
end
|
92
|
-
|
93
|
-
#position specific polymorphic block 2
|
94
|
-
def pspb2(anchor_pos,win_len)
|
95
|
-
if self =~ /WW/
|
96
|
-
pspb2 = self[ww_pos - 4 - anchor_pos - win_len, win_len]
|
97
|
-
elsif self =~ /VW/
|
98
|
-
pspb2 = self[vw_pos - 12 - win_len - anchor_pos, win_len]
|
99
|
-
else
|
100
|
-
error = 'WW or VW motif missing'
|
101
|
-
end
|
102
|
-
pspb2
|
103
|
-
end
|
104
|
-
|
105
|
-
#position specific polymorphic block 3
|
106
|
-
def pspb3(anchor_pos,win_len)
|
107
|
-
if self =~ /WW/
|
108
|
-
pspb3 = self[ww_pos + 14 + anchor_pos, win_len]
|
109
|
-
elsif self =~ /VW/
|
110
|
-
pspb3 = self[vw_pos + 6 + anchor_pos, win_len]
|
111
|
-
else
|
112
|
-
error = 'WW or VW motif missing'
|
113
|
-
end
|
114
|
-
pspb3
|
115
|
-
end
|
116
|
-
|
117
|
-
#position specific polymorphic block 4
|
118
|
-
def pspb4(anchor_pos,win_len)
|
119
|
-
self[self.length - 12 - win_len - anchor_pos, win_len]
|
120
|
-
end
|
121
|
-
|
122
|
-
|
123
|
-
private
|
124
|
-
def accepted_length
|
125
|
-
100..168
|
126
|
-
end
|
3
|
+
def has_accepted_length?
|
4
|
+
true if accepted_length.include? self.length
|
5
|
+
end
|
127
6
|
|
128
|
-
|
7
|
+
def start_motif
|
8
|
+
self[0,5]
|
9
|
+
end
|
10
|
+
|
11
|
+
def end_motif
|
12
|
+
self[-5,self.length]
|
13
|
+
end
|
14
|
+
|
15
|
+
def ww_pos
|
16
|
+
rindex("WW")
|
17
|
+
end
|
18
|
+
|
19
|
+
def vw_pos
|
20
|
+
rindex("VW")
|
21
|
+
end
|
22
|
+
|
23
|
+
#number of cysteines
|
24
|
+
def cys_count
|
25
|
+
scan(/C/).size
|
26
|
+
end
|
27
|
+
|
28
|
+
#The first position of limited variability(polv1)
|
29
|
+
def polv1
|
30
|
+
self[10,4]
|
31
|
+
end
|
32
|
+
|
33
|
+
#The second position of limited variability(polv2)
|
34
|
+
def polv2
|
35
|
+
if !ww_missing?
|
36
|
+
return self[ww_pos - 4,4]
|
37
|
+
elsif !vw_missing?
|
38
|
+
return self[vw_pos - 12,4]
|
39
|
+
else
|
40
|
+
return '....'
|
41
|
+
end
|
42
|
+
end
|
129
43
|
|
130
|
-
#
|
131
|
-
|
44
|
+
#The third position of limited variability(polv3)
|
45
|
+
def polv3
|
46
|
+
if !ww_missing?
|
47
|
+
return self[ww_pos + 10,4]
|
48
|
+
elsif !vw_missing?
|
49
|
+
return self[vw_pos + 2,4]
|
50
|
+
else
|
51
|
+
return '....'
|
52
|
+
end
|
53
|
+
end
|
132
54
|
|
133
|
-
#
|
134
|
-
|
55
|
+
#The fourth position of limited variability(polv4)
|
56
|
+
def polv4
|
57
|
+
self[self.length - 12,4]
|
58
|
+
end
|
135
59
|
|
136
|
-
#
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
60
|
+
#Assigning dsid group based on number of cysteines, presence of REY motif in polv2 and MFK in polv1,
|
61
|
+
def cyspolv_group
|
62
|
+
case
|
63
|
+
when cys_count > 4 || cys_count == 3 || cys_count < 2
|
64
|
+
group = 6
|
65
|
+
when cys_count == 4 && polv2 =~ /REY/i
|
66
|
+
group = 5
|
67
|
+
when cys_count == 4
|
68
|
+
group = 4
|
69
|
+
when cys_count == 2 && polv1 =~ /MFK/i
|
70
|
+
group = 1
|
71
|
+
when cys_count == 2 && polv2 =~ /REY/i
|
72
|
+
group =2
|
73
|
+
else
|
74
|
+
group = 3
|
75
|
+
end
|
76
|
+
group
|
77
|
+
end
|
141
78
|
|
142
|
-
|
143
|
-
|
79
|
+
def is_var1_cp1?
|
80
|
+
return true if cyspolv_group == 1 && self =~ /NVHDKVEKGLREVF|NVHDKVETGLREVF/i
|
81
|
+
end
|
144
82
|
|
145
|
-
|
146
|
-
|
83
|
+
def is_var1_cp2?
|
84
|
+
return true if cyspolv_group == 2 && self =~ /APNKEKIKLEENLKK/i
|
85
|
+
end
|
147
86
|
|
148
|
-
|
149
|
-
|
87
|
+
def is_var1?
|
88
|
+
return true if is_var1_cp1? || is_var1_cp2?
|
89
|
+
end
|
150
90
|
|
91
|
+
def bs_group
|
92
|
+
case
|
93
|
+
when self =~ /(?:R(?:HYADHDKSGNYYK|NENNNLGKLSNEQ|I(?:RHYDDGSGNY(?:SK|YK)|THYN(?:GVSGNCVK|DGSGNYVK))|E(?:RYKDLKDVEIDD|HYKEVKNGNY(?:YK|IK)|KYKDLKD(?:VEIDD|LPIDD))|VKETYKDDPNYYK|KNNSSLRKLTNEQ)|G(?:G(?:RGRK(?:KLEDNL(?:IE|KE)|QLEENLQK)|GGRKKLEDNLKE)|I(?:N(?:D(?:Y(?:ND(?:GSGNYFK|ISGNYYK)|D(?:RDGPE(?:HYK|YYK)|GDGPEYYK))|CDRDGPEYYK)|AYNDGSENYYK)|IDYDHDGPHYYK)|P(?:SQEK(?:IKLEENLK|KKLEENLK)|KQEKKELEENLK)|K(?:NYPDDGSGN(?:YYK|FYK)|K(?:YYND(?:G(?:SGNYYK|TGNYVK)|ETGNYYK)|KEKEKIYGNIE)|QYYNDENGNYYK)|QTYPDDGSGNYYK)|MESNANLKKHTLER|S(?:STNTQCRCATNDV|HY(?:TDTHGSIDYDK|ED(?:GDKSGNYYK|KDKSGNYIK)|ADHDKSGNY(?:YK|LK))|NKEKEKIENSLQN|DYKDDD(?:GSGNYYK|IDGNYY(?:K|Q))|E(?:GKCGHKETERDL|KVEYGLRKLFKK)|VQERYGNDPNFFQ|KI(?:NDYDGDPNYYK|TDYDNDPNYYK)|QGQCGRNENNGYP|F(?:S(?:SEYCGH(?:RQGS(?:V|A)|GDNEV|EQGNV)|N(?:GQCGHRDENV|SKCGHGEHEV|DYCGH(?:RQGSV|NENKV)|PKCGH(?:G(?:DNEV|EHEV)|NENKV|EQGNV)|EYCGHRQGSV)|D(?:RKCGHYEGAP|HKCGH(?:GDKDV|YEGAP|DENAP)))|TN(?:GQCGHNEENV|PKCG(?:RGDNEV|H(?:G(?:DNEV|EHEV)|DENKV)))|ADAYCGRGDENV)|LILPYSKCGRDTD)|H(?:HYKDDDISGNYSK|SKEKEKLQTNLKN|N(?:H(?:IKKPLLENLEQ|KKKPLL(?:DNLEK|ENLEQ))|NKKKALLDNLEK|QKKINLEKSLHR)|EQG(?:YNKLEAI(?:SKT|LKT)|NNKLEA(?:RLKT|ILKT))|QQRK(?:RKLEENLRN|GKLEENLRN))|C(?:RAP(?:N(?:GANYFRKGL|EANYFKNVA)|KNAHYFIKSS|QKANYFKNVA)|G(?:TGENDTYFKNSS|A(?:G(?:EKDTYF(?:TYS(?:N|K)|VQLD)|A(?:RDEYFIKPS|KDTYFTYSK))|TMNDIFSKNIG|LPKSAY(?:VLQSE|F(?:MQLE|LQSE))))|S(?:VPYEAYYFTYKS|A(?:G(?:PKDTYFIKSG|QKDTYFIKPN)|D(?:GSE(?:DYFIKSS|EYF(?:IQSE|KKQS))|DSEDYFI(?:RSE|QSE))|P(?:RDA(?:DYFIKNS|QYFIKSS)|GD(?:VNYFRK(?:GL|IS|ES|FS)|AKYVK(?:YFP|NFP))|HNAQYVKYVP|Y(?:GANYYRKYS|HPGYFRQSK|YADYF(?:RKGS|K(?:SVA|KK(?:S|P)))|NAHYFIKSS|CADYFKKKS|DANYVRRKS|EA(?:YYFTYKS|QYFIKSS)|KSQYFIKSS)|D(?:YAKYFRQTC|NAKY(?:VKYFP|FKPPK)))|QNNEVYFINSE))|Y(?:IPY(?:YVNYFK(?:NIS|DIS|K(?:T(?:S|P)|KS))|CVNYFKNIS)|APNNANYFIGSG)|NAP(?:GD(?:VHYFRKDP|AHYFRKDP)|Y(?:DANY(?:YR(?:KYS|QTC)|VRRKS|FRKTS)|EAQY(?:YIKSS|FIK(?:SS|PS))|K(?:SRYF(?:M(?:HSE|QSE)|IQSE)|A(?:QYYIKSS|WYFMHSE)))|NISGYFMQS(?:G|E)|D(?:NVNYFRKYS|KAEYFVYKS))|TAP(?:YGANYYRKYS|D(?:NVNYFRKYS|KANYFIYKS))|IAPRDAHYFLKSS|D(?:TEESDTYFKQSS|A(?:SYKSGYFMQSE|P(?:RDA(?:HYFLKSS|NFFIKNS|DYF(?:RKGS|KNVA))|YKSRYF(?:MQSE|IQSE)|KDANYFIGSG|QKVDYFRK(?:GS|IS))))|EA(?:SKNANFFIK(?:NS|DS)|P(?:GDAHYFRKGP|ENAYIIKRRI|KDANYFIGSG|QKVDYFRKG(?:S|L)))|VA(?:GEGNTYFIQLD|PENAYFRKTEA)|KA(?:S(?:RNA(?:HYFLKSS|NYFRK(?:IS|AL))|KNANFFIKNS)|P(?:GDVN(?:YFRKIS|FFIKNS)|NGANYFRKKS|TGA(?:HYFLKSS|DYF(?:VYKP|KKKS))|PKVDYSRNIS|EDADYFRKGS|K(?:GANYFRKES|DA(?:HYFLKSS|N(?:YFIGSG|FFIKIS)|DYFRKGS))|Q(?:GANYFRNIS|SVHYFIKTS|DANYF(?:R(?:N(?:IS|VS)|K(?:GL|IS))|TKES)|K(?:VDYFRKGS|ANYFRKGS)))|KEGDIYSKTTD)|FA(?:HNTEEYFIKSE|DGSEEYFI(?:KSS|QS(?:S|E)))|AAR(?:GNDLYSKNIG|YHPGYFKKSD))|Y(?:NERDR(?:DKKRKLQE|EKKRKLQD|AQKKKLQD)|DEKEKNRRKQLEN|KAP(?:RKA(?:NYFIYKS|DYFRNIS)|KDAHYFLKSS|QDANYFRNVS))|N(?:RKEKGKLQTNLKN|GDYK(?:EKVSNNL(?:RA|KT)|KKVSNNLKT)|HYKDD(?:NGS(?:GNYYK|ENYYK)|D(?:GSGNYYK|ISGNY(?:SK|YK)))|SDDKVE(?:NGLKKVF|KGLREVF)|Y(?:Y(?:NNTGNN(?:V(?:NYAK|DYVK)|ANYAK)|ADGDKSGNYYK)|NYDEDGSGNY(?:YK|VK))|N(?:HDNVE(?:NGL(?:REVF|KAVF)|KGLK(?:KVF|AVF))|D(?:ND(?:RVKKEKLQN|K(?:IKK(?:GKLRG|EKL(?:RG|QE))|VKKEKL(?:RG|QN)))|DDKIKKGKLRG|V(?:EKGL(?:DVVFKK|KVVFKK)|VKGLDVVFKK))|E(?:SE(?:IKRKEKLRG|KKKREELQG)|TDKEQKVKLEK|KDM(?:REKQKLQS|TEKQKLQS))|VDAVQEGLKVVF|KE(?:NEKLQENLKR|KEKIEKSLQN))|TVDK(?:IHEGLKVVF|VHEGLKVVF)|D(?:NVE(?:NGLREVFKK|KGLK(?:KVFDK|AVFRK))|DVEKGLKIVFEK|EDDVEKGLKIVF|K(?:D(?:YVENGLKKVF|A(?:V(?:RHGLKVVF|QKGLRAVF)|AQKVLRTVF))|EKDQRKKLDE(?:N|I)|VE(?:NGL(?:REVFKK|KKVFDK)|KGL(?:REVF(?:RK|KK)|QVVFGK)))|QD(?:DVEKGLKIVF|EVWNGLRSVF)|ADKV(?:EKGLQVVF|QKGLQVVF))|P(?:E(?:DKVHEGLKVVF|VE(?:NGLREVFNK|KGLKAVFRK))|QDKVQ(?:EGLK(?:NVF|VVF)|KGLREVF))|E(?:MVEIGLKKVFKK|HYKEVKNGNYVK|NVEKGL(?:K(?:IVFEK|KVFDK)|QVVFGK)|D(?:DKVQKGLQVVF|VEKGLKVVF(?:KK|QK))|EDAVQKGL(?:RAVF|K(?:VVF|KVF))|K(?:DAVQNGLKKVF|VE(?:YGLRKLFKK|IGLKKVF(?:DK|EK|KK)))|QDEVWKGLRDVF)|VH(?:YK(?:DDGS(?:GNYYK|ENYYK)|EVKNGNYVK)|DKVE(?:RGLREVF|TGLREVF|KGL(?:REVF|QVV(?:F|L))))|K(?:HDNIEKGLREVF|N(?:NVPL(?:HNLSLDK|DKLSLDK)|K(?:SPLDKLSLEQ|PPLDKLSVDK)|VEIGLK(?:NVFKN|KVFDK))|DDK(?:IEKSLRAIF|V(?:EKGLRAIF|QKGL(?:RAVF|KAVF|QVVF)))|Q(?:RKKILQEKLEN|EKEKREKLDEN))|FSNPKCGH(?:DEGIV|KQGNV)|QEDKVQEGLKVVF|LILTHPKCGHDTD)|I(?:SYYNADEKGNFYK|HNYDDNGSGNYYK|E(?:TRY(?:GSDTTNYYQ|ENDGPNYYQ)|ARYKKDDDNYYQ)|VSFDQCGHND(?:MDV|VDV)|KNDKTLNNLSNGQ|FSNEHCGHKQGSV)|T(?:SEGQCGHNDKMRP|HYADEDGS(?:GNYVK|ENYYK)|D(?:N(?:D(?:EVW(?:TGLRSVF|KGL(?:RSVF|GSVF))|AVQKGLRAVF)|VE(?:NGLREVFKK|KGLRAVFGK))|K(?:D(?:YVENGLK(?:KVF|AVF)|DVENGLREVF|EV(?:KEGLKVVF|WKGLRAVF)|AVQKGLRAVF)|VENGLK(?:EVFDK|KVFD(?:N|K))))|E(?:GYCGRNENNGYP|TLYKDEEGNYLK|KDDVEKGLKIVF)|V(?:S(?:SNKCGHNDMDV|NAKRREGDENP|FDQCGHNDM(?:HV|DV))|K(?:GTYKDDPYYYK|ETYKDDPNYYK))|F(?:S(?:GYWCGHYEGAP|NDYCGHGEHEV)|TYTKCGHDENKV))|D(?:SRTDKLEENLRKI|NNSDKLRDLSVDK|DDVEKGLKIVFEK|K(?:GEKKKLEKNLKD|NRGKLGALSLDD))|P(?:SY(?:IKCGHNNKDDP|LKCGHNNKDDP)|HYTNDRGLADYVK)|E(?:HY(?:EDVDGSGNYLK|KDVDGSGNYYK)|YY(?:NDTNNKINYVK|EDKDPDKNYYQ)|KNY(?:YNDGTGNYYK|PDDGSGNYSK)|FT(?:GGYCGRDETDV|SGYCGRNETNV))|V(?:NGNDKLESNLKKI|KAHY(?:KKDAPYYYK|QKDAPNYYK)|FSNRQCGHYED(?:VP|AP))|K(?:RYYNDDTD(?:NN(?:FYQ|LYQ)|DNFYQ)|GINDYDGDPNYYK|SYY(?:NAD(?:GEGNFYK|EKGNYYK)|DADEKGNYYK)|HY(?:TDTHGSIDYDK|A(?:H(?:GDGSGNY(?:SK|YK|LK)|DDGS(?:GNYYK|VNYYK))|DEDGSGNYYK))|YYNDTNNKINYVK|N(?:Y(?:YNPD(?:G(?:SGNYYK|AGNYYK)|EAGNYYK)|NYD(?:EDGPEYYK|KDGPEYYK))|NDRTLNNLSIGQ|DYNPDGSGNY(?:YK|FK)|E(?:SE(?:IKRKEKLQR|K(?:RTKEKLQG|NTKKKLQG))|NTDLNKLTTEK)|KNTKLSTLTLEK|AYPDDG(?:SGNY(?:YK|FK)|FGNYYK))|T(?:SN(?:SN(?:MDTLSLEQ|LKELSLDK)|TNMNTLSLDK)|IYADLKDVEIDD)|I(?:NDYDGDGPEYYK|THYDDISGNYYK|KDYDGDGPEYYK)|D(?:HY(?:KDEKDGNFFQ|QDDGTGNYYK)|YYNADEKGNYYK)|PHYKDDGFGNYYK|E(?:YYQDDGTGNYYK|ISDYDNDPNYYK|EYGDLKDVPIDD|KYGDLKDVPIDD)|V(?:HYKENKDGNY(?:YK|VK)|KY(?:PDL(?:ND(?:IEIDD|VEIDD)|KDLQIDD)|QDLKDVEIDD))|K(?:HY(?:ENDTDKNYYQ|KKDEDPNYYK)|VYPEDVTGNY(?:YK|FK)|KKKGLSELSTEK)|FS(?:SDRCGHNEGDP|ERKCGH(?:NEGSP|DENAP))|Q(?:HYK(?:DDGSVNYYK|EDKDENYYK)|NNKKLKDLTDKH)|A(?:RYKDRKDPNYYK|K(?:YEDLKTLPIDD|ERYKDIKNYYQ)))|F(?:S(?:S(?:SGPCGRDEAPV|HGKCGHNEGAP|DRCGHNNNDGP|E(?:GKCGHKEGTV|YCGHYKNGDP)|QGQCGHTEGTV)|N(?:RGPCGRNETDV|S(?:G(?:TRGRKELTV|PCGRKELTV|KCGGKEAPV)|KCGHHNNDGP)|N(?:GPCGRNETDV|KCGHSNGGDP)|PKCGHSNGGDP|E(?:HCGH(?:HNNDDP|YKNGDP)|YCGH(?:YKNGDP|KKNEDP)))|DNG(?:HCGRNETNV|PCGRKELIV))|T(?:RQGYCGHSETNV|GGGQC(?:RRNDNSV|GRNETDV)|S(?:HGKCG(?:RNETNV|HSEGAP)|IGKCGHNKGSV|EG(?:RCGHSETNV|KCG(?:RNETNV|HNDNRV)|Q(?:RGHSETNV|C(?:RRNDNSV|GH(?:SETNV|NDKSV|DENKV))))|VGYCGHNKG(?:SV|IR)|QG(?:YCG(?:RKE(?:LTV|APV)|HSETNV)|QCG(?:RNERNV|H(?:SETNV|TEGTV|KEGTV)))|AGKCR(?:RNDNSV|HNDNSV))|NDGKCG(?:RYEGAP|H(?:YE(?:GAP|NNI|D(?:NV|AP))|TEGTV))|TEGYCGR(?:NEGAP|DEGAP)|D(?:GHCGRTQEGHV|IGKCG(?:GKEAPV|H(?:GDKDV|N(?:EGAP|KGSV)|KQGNV))|DGKCGHYEGAP))|ENAGKC(?:RRNDNKV|GHNDNRV)|WDRKCGHSN(?:GGDP|EGA(?:P|L))|LYPKCGHNNKNDL)|Q(?:GIIDYDNDPNYYK|NYY(?:KDDPKKNYYK|ADDGSGNY(?:SK|VK))|ISDY(?:TGDHPNYYK|DGDGPEYYK)|K(?:SDSSLQRLSIEK|HYEDDGSGNYYK|NNSALKKLTDKQ|IY(?:EDINNLPIDD|KDLNNLPIDD))|QNNNTLENLTDKQ)|L(?:K(?:TRYKKDDDNYYQ|KHYQKDAPNYYK)|F(?:S(?:N(?:RQCGH(?:GEHEV|NEGAP|DENKV|EQGNV)|SKCGH(?:RQGNV|DE(?:SKV|NKV)|EQGNV)|YKCGHYEDAP|DYCGHKQGNV|PKCGH(?:EQG(?:NV|TV)|KQGKV)|AYCGHYEGSP)|D(?:GHCGNKDGTV|HKCGHEESRV|YKCGHYE(?:GSP|DAP)))|DYNCGHHKDNNV|WDRKCGHDERNV)|Q(?:TRYTNDGDNYFK|ERYNDPKGDFFQ|ARYKKDGDDFFK)|WNDKCGHHVDKDV|LFSNYKCGHYEGS)|A(?:RYKKDEEDGNYY(?:K|Q)|VSSNKCGHNDMNV|KNDYTGDHPNYYK|LKHYKDDTKNYYQ))/
|
94
|
+
block_sharing = 1
|
95
|
+
when self =~ /(?:RE(?:PGKQHLEERLER|KGKSRLEARLKT)|G(?:G(?:HYKNCHCIGGDV|TYKNCRCASGNV)|P(?:NQEKKLLENKLK|DQEKKKLEENLR)|ANAIKAGDNVSIV)|S(?:FTNGQCGRDGENV|WYPKCGHHVKQDV|AKEHYQDTENYYK)|H(?:NRRKEKLETRLEE|E(?:PG(?:IQ(?:HLEKRLES|YLEKRLES)|KQHL(?:GERLEQ|EERLE(?:R|Q)))|QG(?:YNRLEARLKT|NNRLEARLKT|INRLEARLKT))|QQRKHLLEKRLET)|C(?:RAEEK(?:GTYFKNRE|D(?:TYFKNRE|IYSKTTD)|EIYSKTTD)|G(?:TEDKDTYFIKSG|VEENAKYFRESS|A(?:GMKDIYSKTMN|SEDAKYKVIGP|T(?:MNDIFSKNIR|VDDI(?:SSKNIR|FSKNIR))|P(?:SDAQYFRNTC|KEAKYFRKTA)))|HAPPDAQYTKKGP|NA(?:P(?:TGADYFVYKP|KDANYFEYNS)|WGNTYFRKTCS)|DAG(?:QKDTYFKQSS|AADEYFKKSG)|EA(?:GTSDKYFRKTA|KSDDKYNVIGP)|KA(?:NDDAEYFRKKD|P(?:DKANYF(?:EPPK|KPPK)|EEDHYFKPAQ)|EVDDIYSKTAN|K(?:EGDIYSKT(?:MN|AN)|KG(?:GIYSKTMN|DIYSKTMN))))|Y(?:SQKYKDEKSKLEE|NE(?:TDKVQKAILQQ|KDQEEKRKLQE))|N(?:YYEDNDTDKNYYQ|NNAAKLSELSTAQ|THESAQRKKLEEN|DEEKKKRDELEKN|KN(?:NPPLYKLSLEK|KSPLDKLSLDK))|T(?:RYKKDDEDGNFFQ|PTQGKCHCIDGT(?:N|V)|L(?:FDYKCGHDENAP|WNEKCGHGDYNL))|I(?:EHYKDDPEENFYE|KS(?:NYNDSEGNYFK|QYDDNEGNYFK)|LFDYKCAHDNDKV)|D(?:RKEKVKLEENLKN|KGEKKKLEENLKN|Q(?:ERK(?:HLLEKRLET|QHLEKRLET)|QEK(?:LYLENNLKK|AKLENNLKR))|AKKHYGDDENYYK)|P(?:NKCRCEDANADQV|CSVQKCTCINGDP)|E(?:SN(?:M(?:GQCRCFSGDP|VQCRCFSGDP)|KGQCRCFSGDP)|THGYCRCVNRVDV|F(?:SGGKCGHKDNNV|T(?:GGQCGRDGENV|DGHCGH(?:RQGNV|NEENV))))|VKDRYQNDGPDFFK|K(?:S(?:SYNDDGTGNYFK|YYKNDNDRNYFK)|N(?:HYNDTSKNYYK|NNN(?:ELNNLSLDK|KLSNLSTKE))|D(?:HYKGDEANNYFQ|NNTKLNDLSIQE)|K(?:TNPALKSFTNEE|KLEENLRNIFKN)|FSNPKCGHNEGSP)|F(?:SNDQCGHNN(?:RGDP|GGAP)|WYPKC(?:GHHV(?:RQDV|KQ(?:DV|EV))|SHHVKQDV))|Q(?:RNNIKLQ(?:NIPLHE|TLTLHQ)|N(?:NNTKLQNIPLHE|KNENLKSLSLDK)|KENGDINTLKPEE)|L(?:FYYKCGHYVYKDV|WN(?:Y(?:NCGHHVN(?:RDV|QDV)|KCGHHVNQDV)|DKCGHHVKQDV))|ARDHYNDTSGNYYQ)/
|
96
|
+
block_sharing = 2
|
97
|
+
else
|
98
|
+
block_sharing = 0
|
99
|
+
end
|
100
|
+
block_sharing
|
101
|
+
end
|
151
102
|
|
152
|
-
#
|
153
|
-
|
103
|
+
#distict sequence identifier(DSID)
|
104
|
+
def dsid
|
105
|
+
"#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
|
106
|
+
end
|
154
107
|
|
155
|
-
#
|
156
|
-
|
108
|
+
#position specific polymorphic block 1
|
109
|
+
def pspb1(anchor_pos=0,win_len=14)
|
110
|
+
self[14 + anchor_pos,win_len]
|
111
|
+
end
|
157
112
|
|
158
|
-
#
|
159
|
-
|
113
|
+
#position specific polymorphic block 2
|
114
|
+
def pspb2(anchor_pos=0,win_len=14)
|
115
|
+
if !ww_missing?
|
116
|
+
return self[ww_pos - 4 - anchor_pos - win_len, win_len]
|
117
|
+
elsif !vw_missing?
|
118
|
+
return self[vw_pos - 12 - win_len - anchor_pos, win_len]
|
119
|
+
else
|
120
|
+
return '....'
|
121
|
+
end
|
122
|
+
end
|
160
123
|
|
161
|
-
#
|
162
|
-
|
124
|
+
#position specific polymorphic block 3
|
125
|
+
def pspb3(anchor_pos=0,win_len=14)
|
126
|
+
if !ww_missing?
|
127
|
+
return self[ww_pos + 14 + anchor_pos, win_len]
|
128
|
+
elsif !vw_missing?
|
129
|
+
return self[vw_pos + 6 + anchor_pos, win_len]
|
130
|
+
else
|
131
|
+
return '....'
|
132
|
+
end
|
133
|
+
end
|
163
134
|
|
164
|
-
#
|
165
|
-
|
135
|
+
#position specific polymorphic block 4
|
136
|
+
def pspb4(anchor_pos=0,win_len=14)
|
137
|
+
self[self.length - 12 - win_len - anchor_pos, win_len]
|
138
|
+
end
|
166
139
|
|
167
|
-
|
168
|
-
|
140
|
+
private
|
141
|
+
def accepted_length
|
142
|
+
100..168
|
143
|
+
end
|
169
144
|
|
145
|
+
def ww_missing?
|
146
|
+
true unless self =~ /WW/i
|
147
|
+
end
|
170
148
|
|
171
|
-
|
172
|
-
|
149
|
+
def vw_missing?
|
150
|
+
true unless self =~ /VW/i
|
151
|
+
end
|
173
152
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
#puts tag.end_motif
|
179
|
-
#puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
180
|
-
#end
|
153
|
+
def vw_ww_missing?
|
154
|
+
true if ww_missing? && vw_missing?
|
155
|
+
end
|
156
|
+
end
|
data/spec/aa_spec.rb
CHANGED
@@ -1,32 +1,54 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe "Dbla" do
|
4
|
-
context '
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
it "should return the number of cysteines" do
|
11
|
-
@tag.cys_count.should == 4
|
12
|
-
end
|
4
|
+
context 'Group4 Dbla tag' do
|
5
|
+
before(:each) do
|
6
|
+
seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
|
7
|
+
@tag = Bio::Sequence::AA.new(seq)
|
8
|
+
end
|
13
9
|
|
14
|
-
|
15
|
-
|
16
|
-
|
10
|
+
it "should return the number of cysteines" do
|
11
|
+
@tag.cys_count.should == 4
|
12
|
+
end
|
17
13
|
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
it 'should return a dsid' do
|
15
|
+
@tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
|
16
|
+
end
|
21
17
|
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
it 'should return the cyspolv group' do
|
19
|
+
@tag.cyspolv_group.should == 4
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should return the length' do
|
23
|
+
@tag.length.should == 126
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should return the start motif' do
|
27
|
+
@tag.start_motif.should == 'YIGDI'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should return false for var1' do
|
31
|
+
@tag.is_var1?.should be_false
|
32
|
+
end
|
25
33
|
|
26
|
-
it 'should return the start motif' do
|
27
|
-
@tag.start_motif == 'YIGDI'
|
28
34
|
end
|
29
35
|
|
30
|
-
|
31
|
-
|
36
|
+
context 'Group2 Dbla tag' do
|
37
|
+
before(:each) do
|
38
|
+
seq = 'DIGDIVRGTDLFLGGPSQEKKKLEENLKKILENIKNKNTKLSTLTLEKVREYWWALNRNDVWKALTCSAPYEAQYFIKSSDKEHSFSSEYCGHHNNDDPLTNLDYVPQFLR'
|
39
|
+
@tag2 = Bio::Sequence::AA.new(seq)
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'should return the number of cysteines' do
|
43
|
+
@tag2.cys_count.should == 2
|
44
|
+
end
|
32
45
|
|
46
|
+
it 'should return the block sharing group 1' do
|
47
|
+
@tag2.bs_group.should == 1
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'should return false for cp2 var1' do
|
51
|
+
@tag2.is_var1_cp2?.should be_false
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-dbla-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-02-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement: &
|
16
|
+
requirement: &2152850760 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.4.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152850760
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2152849320 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,21 +32,21 @@ dependencies:
|
|
32
32
|
version: 2.3.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152849320
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bundler
|
38
|
-
requirement: &
|
38
|
+
requirement: &2152848240 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 1.
|
43
|
+
version: 1.1.rc.7
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2152848240
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: jeweler
|
49
|
-
requirement: &
|
49
|
+
requirement: &2152846960 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.6.4
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2152846960
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rcov
|
60
|
-
requirement: &
|
60
|
+
requirement: &2152855080 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2152855080
|
69
69
|
description: Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags
|
70
70
|
email: georgkam@gmail.com
|
71
71
|
executables: []
|
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
103
103
|
version: '0'
|
104
104
|
segments:
|
105
105
|
- 0
|
106
|
-
hash:
|
106
|
+
hash: 3330781070760048632
|
107
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
108
|
none: false
|
109
109
|
requirements:
|
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
version: '0'
|
113
113
|
requirements: []
|
114
114
|
rubyforge_project:
|
115
|
-
rubygems_version: 1.8.
|
115
|
+
rubygems_version: 1.8.12
|
116
116
|
signing_key:
|
117
117
|
specification_version: 3
|
118
118
|
summary: A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags
|