bio-dbla-classifier 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -5,7 +5,7 @@ source "http://rubygems.org"
5
5
  # Needed to run rake, tests, features
6
6
  group :development do
7
7
  gem "rspec", "~> 2.3.0"
8
- gem "bundler", "~> 1.0.0"
8
+ gem "bundler", "~> 1.1.rc.7"
9
9
  gem "jeweler", "~> 1.6.4"
10
10
  gem "rcov", ">= 0"
11
11
  end
@@ -24,7 +24,7 @@ PLATFORMS
24
24
 
25
25
  DEPENDENCIES
26
26
  bio (>= 1.4.2)
27
- bundler (~> 1.0.0)
27
+ bundler (~> 1.1.rc.7)
28
28
  jeweler (~> 1.6.4)
29
29
  rcov
30
30
  rspec (~> 2.3.0)
@@ -6,9 +6,8 @@ If you use this plugin please quote,
6
6
  Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
7
7
 
8
8
  = Installation
9
- You need to have Ruby installed on your system. This plugin has been tested on Ruby 1.9.2-p290. See http://rubylang.info/ for
10
- information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
11
- install the gem. This will install the bioruby gem if it is not already installed on your system.
9
+ Ruby must be installed on your system. See http://rubylang.info/ for information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
10
+ install the gem. This will install the bioruby gem if it is not already installed on your system. The plugin has been tested on Ruby 1.9.2-p290.
12
11
 
13
12
  gem install bio-dbla-classifier
14
13
 
@@ -41,34 +40,36 @@ install the gem. This will install the bioruby gem if it is not already installe
41
40
 
42
41
 
43
42
  #get the block sharing group for this tag
44
- #puts dbl_seq.bs_group #to be implemented
43
+ puts dbl_seq.bs_group #=> 1
45
44
 
46
45
  #get the length of the tag
47
46
  puts dbl_seq.size #=> 115
48
47
 
48
+ dbl_seq.is_var1? #=> false
49
+
49
50
  = Finding the Position Specific Polymorphic Blocks(PSPB)
50
51
 
51
- The pspb methods take 2 arguments, an anchor position and a window length that defines the length of the pspb
52
+ The pspb methods take 2 arguments, an anchor position and a window length that
53
+ defines the length of the pspb.The default anchor position is 0 and the default
54
+ window length is 14
52
55
 
53
56
  #get pspb1
54
- puts seq.pspb1(0,14) #=> NPEVEKGLKAVFRK
57
+ puts seq.pspb1 #=> NPEVEKGLKAVFRK
55
58
 
56
59
  #get pspb2
57
- puts seq.pspb2(0,14) #=> THYADEDGSGNYVK
60
+ puts seq.pspb2 #=> THYADEDGSGNYVK
58
61
 
59
62
  #get pspb3
60
- puts seq.pspb3(0,14) #=> CKAPQSVHYFIKTS
63
+ puts seq.pspb3 #=> CKAPQSVHYFIKTS
61
64
 
62
65
  #get pspb4
63
- puts seq.pspb4(0,14) #=> FTSHGKCGRNETNV
64
-
65
- = Processing fasta files
66
+ puts seq.pspb4 #=> FTSHGKCGRNETNV
66
67
 
67
- If the input is a fasta file,
68
+ = Processing a flatfile for example fasta, genbank or embl
68
69
 
69
70
  seq_file = "sequences.fasta"
70
71
 
71
- #Process each entry in the file
72
+ #for each entry in the file
72
73
  Bio::FlatFile.open(seq_file).each do |entry|
73
74
  tag = Bio::Sequence::AA.new(entry.seq)
74
75
  puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
@@ -76,5 +77,4 @@ If the input is a fasta file,
76
77
 
77
78
  = Copyright
78
79
 
79
- See LICENSE.txt for further details.
80
-
80
+ See LICENSE.txt for further details
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.5.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-dbla-classifier"
8
- s.version = "0.4.0"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["George Githinji"]
12
- s.date = "2011-10-13"
12
+ s.date = "2012-02-20"
13
13
  s.description = "Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags"
14
14
  s.email = "georgkam@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -35,7 +35,7 @@ Gem::Specification.new do |s|
35
35
  s.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
36
36
  s.licenses = ["Ruby"]
37
37
  s.require_paths = ["lib"]
38
- s.rubygems_version = "1.8.10"
38
+ s.rubygems_version = "1.8.12"
39
39
  s.summary = "A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags"
40
40
 
41
41
  if s.respond_to? :specification_version then
@@ -44,20 +44,20 @@ Gem::Specification.new do |s|
44
44
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
45
  s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
46
46
  s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
47
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
47
+ s.add_development_dependency(%q<bundler>, ["~> 1.1.rc.7"])
48
48
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
49
49
  s.add_development_dependency(%q<rcov>, [">= 0"])
50
50
  else
51
51
  s.add_dependency(%q<bio>, [">= 1.4.2"])
52
52
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
53
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
53
+ s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
54
54
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
55
55
  s.add_dependency(%q<rcov>, [">= 0"])
56
56
  end
57
57
  else
58
58
  s.add_dependency(%q<bio>, [">= 1.4.2"])
59
59
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
60
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
60
+ s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
61
61
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
62
62
  s.add_dependency(%q<rcov>, [">= 0"])
63
63
  end
@@ -1,180 +1,156 @@
1
1
  class Bio::Sequence::AA
2
2
 
3
- def has_accepted_length?
4
- true if accepted_length.include? self.length
5
- end
6
-
7
- def start_motif
8
- self[0,5]
9
- end
10
-
11
- def end_motif
12
- self[-5,self.length]
13
- end
14
-
15
- def dsid
16
- "#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
17
- end
18
-
19
- def ww_pos
20
- rindex("WW")
21
- end
22
-
23
- def vw_pos
24
- rindex("VW")
25
- end
26
-
27
- #number of cysteines
28
- def cys_count
29
- scan(/C/).size
30
- end
31
-
32
- #The first position of limited variability(polv1)
33
- def polv1
34
- self[10,4]
35
- end
36
-
37
- #The second position of limited variability(polv2)
38
- def polv2
39
- if self =~ /WW/
40
- polv2 = self[ww_pos - 4,4]
41
- elsif self =~ /VW/
42
- polv2 = self[vw_pos - 12,4]
43
- else
44
- error = 'WW or VW motif missing'
45
- end
46
- polv2 unless error
47
- end
48
-
49
- #The third position of limited variability(polv3)
50
- def polv3
51
- if self =~ /WW/
52
- polv3 = self[ww_pos + 10,4]
53
- elsif self =~ /VW/
54
- polv3 = self[vw_pos + 2,4]
55
- else
56
- error = 'WW or VW motif missing'
57
- end
58
-
59
- polv3 unless error
60
-
61
- end
62
-
63
- #The fourth position of limited variability(polv4)
64
- def polv4
65
- self[self.length - 12,4]
66
- end
67
-
68
- #Assigning dsid group based on cysteines coun and presence of
69
- #REY motif in polv2, MFK in polv1,
70
- def cyspolv_group
71
- case
72
- when cys_count > 4 || cys_count == 3 || cys_count < 2
73
- group = 6
74
- when cys_count == 4 && polv2 =~ /REY/i
75
- group = 5
76
- when cys_count == 4
77
- group = 4
78
- when cys_count == 2 && polv1 =~ /MFK/i
79
- group = 1
80
- when cys_count == 2 && polv2 =~ /REY/i
81
- group =2
82
- else
83
- group = 3
84
- end
85
- group
86
- end
87
-
88
- #position specific polymorphic block 1
89
- def pspb1(anchor_pos,win_len)
90
- self[14 + anchor_pos,win_len]
91
- end
92
-
93
- #position specific polymorphic block 2
94
- def pspb2(anchor_pos,win_len)
95
- if self =~ /WW/
96
- pspb2 = self[ww_pos - 4 - anchor_pos - win_len, win_len]
97
- elsif self =~ /VW/
98
- pspb2 = self[vw_pos - 12 - win_len - anchor_pos, win_len]
99
- else
100
- error = 'WW or VW motif missing'
101
- end
102
- pspb2
103
- end
104
-
105
- #position specific polymorphic block 3
106
- def pspb3(anchor_pos,win_len)
107
- if self =~ /WW/
108
- pspb3 = self[ww_pos + 14 + anchor_pos, win_len]
109
- elsif self =~ /VW/
110
- pspb3 = self[vw_pos + 6 + anchor_pos, win_len]
111
- else
112
- error = 'WW or VW motif missing'
113
- end
114
- pspb3
115
- end
116
-
117
- #position specific polymorphic block 4
118
- def pspb4(anchor_pos,win_len)
119
- self[self.length - 12 - win_len - anchor_pos, win_len]
120
- end
121
-
122
-
123
- private
124
- def accepted_length
125
- 100..168
126
- end
3
+ def has_accepted_length?
4
+ true if accepted_length.include? self.length
5
+ end
127
6
 
128
- end
7
+ def start_motif
8
+ self[0,5]
9
+ end
10
+
11
+ def end_motif
12
+ self[-5,self.length]
13
+ end
14
+
15
+ def ww_pos
16
+ rindex("WW")
17
+ end
18
+
19
+ def vw_pos
20
+ rindex("VW")
21
+ end
22
+
23
+ #number of cysteines
24
+ def cys_count
25
+ scan(/C/).size
26
+ end
27
+
28
+ #The first position of limited variability(polv1)
29
+ def polv1
30
+ self[10,4]
31
+ end
32
+
33
+ #The second position of limited variability(polv2)
34
+ def polv2
35
+ if !ww_missing?
36
+ return self[ww_pos - 4,4]
37
+ elsif !vw_missing?
38
+ return self[vw_pos - 12,4]
39
+ else
40
+ return '....'
41
+ end
42
+ end
129
43
 
130
- #create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
131
- #to classify and describe Dbla properties
44
+ #The third position of limited variability(polv3)
45
+ def polv3
46
+ if !ww_missing?
47
+ return self[ww_pos + 10,4]
48
+ elsif !vw_missing?
49
+ return self[vw_pos + 2,4]
50
+ else
51
+ return '....'
52
+ end
53
+ end
132
54
 
133
- #seq1 ='DIGDIVRGRDMFKSNPEVEKGLKAVFRKINNGLTPQAKTHYADEDGSGNYVKLREDWWKANRDQVWKAITCKAPQSVHYFIKTSHGTRGFTSHGKCGRNETNVPTNLDYVPQYLR'
134
- #seq = Bio::Sequence::AA.new(seq1)
55
+ #The fourth position of limited variability(polv4)
56
+ def polv4
57
+ self[self.length - 12,4]
58
+ end
135
59
 
136
- #get the positions of limited variability
137
- #puts seq.polv1
138
- #puts seq.polv2
139
- #puts seq.polv3
140
- #puts seq.polv4
60
+ #Assigning dsid group based on number of cysteines, presence of REY motif in polv2 and MFK in polv1,
61
+ def cyspolv_group
62
+ case
63
+ when cys_count > 4 || cys_count == 3 || cys_count < 2
64
+ group = 6
65
+ when cys_count == 4 && polv2 =~ /REY/i
66
+ group = 5
67
+ when cys_count == 4
68
+ group = 4
69
+ when cys_count == 2 && polv1 =~ /MFK/i
70
+ group = 1
71
+ when cys_count == 2 && polv2 =~ /REY/i
72
+ group =2
73
+ else
74
+ group = 3
75
+ end
76
+ group
77
+ end
141
78
 
142
- #get the number if cysteines in the tag
143
- #puts seq.cys_count
79
+ def is_var1_cp1?
80
+ return true if cyspolv_group == 1 && self =~ /NVHDKVEKGLREVF|NVHDKVETGLREVF/i
81
+ end
144
82
 
145
- #get the distinct sequence identifier
146
- #puts seq.dsid
83
+ def is_var1_cp2?
84
+ return true if cyspolv_group == 2 && self =~ /APNKEKIKLEENLKK/i
85
+ end
147
86
 
148
- #get the cyspolv group for this tag
149
- #puts seq.cyspolv_group
87
+ def is_var1?
88
+ return true if is_var1_cp1? || is_var1_cp2?
89
+ end
150
90
 
91
+ def bs_group
92
+ case
93
+ when self =~ /(?:R(?:HYADHDKSGNYYK|NENNNLGKLSNEQ|I(?:RHYDDGSGNY(?:SK|YK)|THYN(?:GVSGNCVK|DGSGNYVK))|E(?:RYKDLKDVEIDD|HYKEVKNGNY(?:YK|IK)|KYKDLKD(?:VEIDD|LPIDD))|VKETYKDDPNYYK|KNNSSLRKLTNEQ)|G(?:G(?:RGRK(?:KLEDNL(?:IE|KE)|QLEENLQK)|GGRKKLEDNLKE)|I(?:N(?:D(?:Y(?:ND(?:GSGNYFK|ISGNYYK)|D(?:RDGPE(?:HYK|YYK)|GDGPEYYK))|CDRDGPEYYK)|AYNDGSENYYK)|IDYDHDGPHYYK)|P(?:SQEK(?:IKLEENLK|KKLEENLK)|KQEKKELEENLK)|K(?:NYPDDGSGN(?:YYK|FYK)|K(?:YYND(?:G(?:SGNYYK|TGNYVK)|ETGNYYK)|KEKEKIYGNIE)|QYYNDENGNYYK)|QTYPDDGSGNYYK)|MESNANLKKHTLER|S(?:STNTQCRCATNDV|HY(?:TDTHGSIDYDK|ED(?:GDKSGNYYK|KDKSGNYIK)|ADHDKSGNY(?:YK|LK))|NKEKEKIENSLQN|DYKDDD(?:GSGNYYK|IDGNYY(?:K|Q))|E(?:GKCGHKETERDL|KVEYGLRKLFKK)|VQERYGNDPNFFQ|KI(?:NDYDGDPNYYK|TDYDNDPNYYK)|QGQCGRNENNGYP|F(?:S(?:SEYCGH(?:RQGS(?:V|A)|GDNEV|EQGNV)|N(?:GQCGHRDENV|SKCGHGEHEV|DYCGH(?:RQGSV|NENKV)|PKCGH(?:G(?:DNEV|EHEV)|NENKV|EQGNV)|EYCGHRQGSV)|D(?:RKCGHYEGAP|HKCGH(?:GDKDV|YEGAP|DENAP)))|TN(?:GQCGHNEENV|PKCG(?:RGDNEV|H(?:G(?:DNEV|EHEV)|DENKV)))|ADAYCGRGDENV)|LILPYSKCGRDTD)|H(?:HYKDDDISGNYSK|SKEKEKLQTNLKN|N(?:H(?:IKKPLLENLEQ|KKKPLL(?:DNLEK|ENLEQ))|NKKKALLDNLEK|QKKINLEKSLHR)|EQG(?:YNKLEAI(?:SKT|LKT)|NNKLEA(?:RLKT|ILKT))|QQRK(?:RKLEENLRN|GKLEENLRN))|C(?:RAP(?:N(?:GANYFRKGL|EANYFKNVA)|KNAHYFIKSS|QKANYFKNVA)|G(?:TGENDTYFKNSS|A(?:G(?:EKDTYF(?:TYS(?:N|K)|VQLD)|A(?:RDEYFIKPS|KDTYFTYSK))|TMNDIFSKNIG|LPKSAY(?:VLQSE|F(?:MQLE|LQSE))))|S(?:VPYEAYYFTYKS|A(?:G(?:PKDTYFIKSG|QKDTYFIKPN)|D(?:GSE(?:DYFIKSS|EYF(?:IQSE|KKQS))|DSEDYFI(?:RSE|QSE))|P(?:RDA(?:DYFIKNS|QYFIKSS)|GD(?:VNYFRK(?:GL|IS|ES|FS)|AKYVK(?:YFP|NFP))|HNAQYVKYVP|Y(?:GANYYRKYS|HPGYFRQSK|YADYF(?:RKGS|K(?:SVA|KK(?:S|P)))|NAHYFIKSS|CADYFKKKS|DANYVRRKS|EA(?:YYFTYKS|QYFIKSS)|KSQYFIKSS)|D(?:YAKYFRQTC|NAKY(?:VKYFP|FKPPK)))|QNNEVYFINSE))|Y(?:IPY(?:YVNYFK(?:NIS|DIS|K(?:T(?:S|P)|KS))|CVNYFKNIS)|APNNANYFIGSG)|NAP(?:GD(?:VHYFRKDP|AHYFRKDP)|Y(?:DANY(?:YR(?:KYS|QTC)|VRRKS|FRKTS)|EAQY(?:YIKSS|FIK(?:SS|PS))|K(?:SRYF(?:M(?:HSE|QSE)|IQSE)|A(?:QYYIKSS|WYFMHSE)))|NISGYFMQS(?:G|E)|D(?:NVNYFRKYS|KAEYFVYKS))|TAP(?:YGANYYRKYS|D(?:NVNYFRKYS|KANYFIYKS))|IAPRDAHYFLKSS|D(?:TEESDTYFKQSS|A(?:SYKSGYFMQSE|P(?:RDA(?:HYFLKSS|NFFIKNS|DYF(?:RKGS|KNVA))|YKSRYF(?:MQSE|IQSE)|KDANYFIGSG|QKVDYFRK(?:GS|IS))))|EA(?:SKNANFFIK(?:NS|DS)|P(?:GDAHYFRKGP|ENAYIIKRRI|KDANYFIGSG|QKVDYFRKG(?:S|L)))|VA(?:GEGNTYFIQLD|PENAYFRKTEA)|KA(?:S(?:RNA(?:HYFLKSS|NYFRK(?:IS|AL))|KNANFFIKNS)|P(?:GDVN(?:YFRKIS|FFIKNS)|NGANYFRKKS|TGA(?:HYFLKSS|DYF(?:VYKP|KKKS))|PKVDYSRNIS|EDADYFRKGS|K(?:GANYFRKES|DA(?:HYFLKSS|N(?:YFIGSG|FFIKIS)|DYFRKGS))|Q(?:GANYFRNIS|SVHYFIKTS|DANYF(?:R(?:N(?:IS|VS)|K(?:GL|IS))|TKES)|K(?:VDYFRKGS|ANYFRKGS)))|KEGDIYSKTTD)|FA(?:HNTEEYFIKSE|DGSEEYFI(?:KSS|QS(?:S|E)))|AAR(?:GNDLYSKNIG|YHPGYFKKSD))|Y(?:NERDR(?:DKKRKLQE|EKKRKLQD|AQKKKLQD)|DEKEKNRRKQLEN|KAP(?:RKA(?:NYFIYKS|DYFRNIS)|KDAHYFLKSS|QDANYFRNVS))|N(?:RKEKGKLQTNLKN|GDYK(?:EKVSNNL(?:RA|KT)|KKVSNNLKT)|HYKDD(?:NGS(?:GNYYK|ENYYK)|D(?:GSGNYYK|ISGNY(?:SK|YK)))|SDDKVE(?:NGLKKVF|KGLREVF)|Y(?:Y(?:NNTGNN(?:V(?:NYAK|DYVK)|ANYAK)|ADGDKSGNYYK)|NYDEDGSGNY(?:YK|VK))|N(?:HDNVE(?:NGL(?:REVF|KAVF)|KGLK(?:KVF|AVF))|D(?:ND(?:RVKKEKLQN|K(?:IKK(?:GKLRG|EKL(?:RG|QE))|VKKEKL(?:RG|QN)))|DDKIKKGKLRG|V(?:EKGL(?:DVVFKK|KVVFKK)|VKGLDVVFKK))|E(?:SE(?:IKRKEKLRG|KKKREELQG)|TDKEQKVKLEK|KDM(?:REKQKLQS|TEKQKLQS))|VDAVQEGLKVVF|KE(?:NEKLQENLKR|KEKIEKSLQN))|TVDK(?:IHEGLKVVF|VHEGLKVVF)|D(?:NVE(?:NGLREVFKK|KGLK(?:KVFDK|AVFRK))|DVEKGLKIVFEK|EDDVEKGLKIVF|K(?:D(?:YVENGLKKVF|A(?:V(?:RHGLKVVF|QKGLRAVF)|AQKVLRTVF))|EKDQRKKLDE(?:N|I)|VE(?:NGL(?:REVFKK|KKVFDK)|KGL(?:REVF(?:RK|KK)|QVVFGK)))|QD(?:DVEKGLKIVF|EVWNGLRSVF)|ADKV(?:EKGLQVVF|QKGLQVVF))|P(?:E(?:DKVHEGLKVVF|VE(?:NGLREVFNK|KGLKAVFRK))|QDKVQ(?:EGLK(?:NVF|VVF)|KGLREVF))|E(?:MVEIGLKKVFKK|HYKEVKNGNYVK|NVEKGL(?:K(?:IVFEK|KVFDK)|QVVFGK)|D(?:DKVQKGLQVVF|VEKGLKVVF(?:KK|QK))|EDAVQKGL(?:RAVF|K(?:VVF|KVF))|K(?:DAVQNGLKKVF|VE(?:YGLRKLFKK|IGLKKVF(?:DK|EK|KK)))|QDEVWKGLRDVF)|VH(?:YK(?:DDGS(?:GNYYK|ENYYK)|EVKNGNYVK)|DKVE(?:RGLREVF|TGLREVF|KGL(?:REVF|QVV(?:F|L))))|K(?:HDNIEKGLREVF|N(?:NVPL(?:HNLSLDK|DKLSLDK)|K(?:SPLDKLSLEQ|PPLDKLSVDK)|VEIGLK(?:NVFKN|KVFDK))|DDK(?:IEKSLRAIF|V(?:EKGLRAIF|QKGL(?:RAVF|KAVF|QVVF)))|Q(?:RKKILQEKLEN|EKEKREKLDEN))|FSNPKCGH(?:DEGIV|KQGNV)|QEDKVQEGLKVVF|LILTHPKCGHDTD)|I(?:SYYNADEKGNFYK|HNYDDNGSGNYYK|E(?:TRY(?:GSDTTNYYQ|ENDGPNYYQ)|ARYKKDDDNYYQ)|VSFDQCGHND(?:MDV|VDV)|KNDKTLNNLSNGQ|FSNEHCGHKQGSV)|T(?:SEGQCGHNDKMRP|HYADEDGS(?:GNYVK|ENYYK)|D(?:N(?:D(?:EVW(?:TGLRSVF|KGL(?:RSVF|GSVF))|AVQKGLRAVF)|VE(?:NGLREVFKK|KGLRAVFGK))|K(?:D(?:YVENGLK(?:KVF|AVF)|DVENGLREVF|EV(?:KEGLKVVF|WKGLRAVF)|AVQKGLRAVF)|VENGLK(?:EVFDK|KVFD(?:N|K))))|E(?:GYCGRNENNGYP|TLYKDEEGNYLK|KDDVEKGLKIVF)|V(?:S(?:SNKCGHNDMDV|NAKRREGDENP|FDQCGHNDM(?:HV|DV))|K(?:GTYKDDPYYYK|ETYKDDPNYYK))|F(?:S(?:GYWCGHYEGAP|NDYCGHGEHEV)|TYTKCGHDENKV))|D(?:SRTDKLEENLRKI|NNSDKLRDLSVDK|DDVEKGLKIVFEK|K(?:GEKKKLEKNLKD|NRGKLGALSLDD))|P(?:SY(?:IKCGHNNKDDP|LKCGHNNKDDP)|HYTNDRGLADYVK)|E(?:HY(?:EDVDGSGNYLK|KDVDGSGNYYK)|YY(?:NDTNNKINYVK|EDKDPDKNYYQ)|KNY(?:YNDGTGNYYK|PDDGSGNYSK)|FT(?:GGYCGRDETDV|SGYCGRNETNV))|V(?:NGNDKLESNLKKI|KAHY(?:KKDAPYYYK|QKDAPNYYK)|FSNRQCGHYED(?:VP|AP))|K(?:RYYNDDTD(?:NN(?:FYQ|LYQ)|DNFYQ)|GINDYDGDPNYYK|SYY(?:NAD(?:GEGNFYK|EKGNYYK)|DADEKGNYYK)|HY(?:TDTHGSIDYDK|A(?:H(?:GDGSGNY(?:SK|YK|LK)|DDGS(?:GNYYK|VNYYK))|DEDGSGNYYK))|YYNDTNNKINYVK|N(?:Y(?:YNPD(?:G(?:SGNYYK|AGNYYK)|EAGNYYK)|NYD(?:EDGPEYYK|KDGPEYYK))|NDRTLNNLSIGQ|DYNPDGSGNY(?:YK|FK)|E(?:SE(?:IKRKEKLQR|K(?:RTKEKLQG|NTKKKLQG))|NTDLNKLTTEK)|KNTKLSTLTLEK|AYPDDG(?:SGNY(?:YK|FK)|FGNYYK))|T(?:SN(?:SN(?:MDTLSLEQ|LKELSLDK)|TNMNTLSLDK)|IYADLKDVEIDD)|I(?:NDYDGDGPEYYK|THYDDISGNYYK|KDYDGDGPEYYK)|D(?:HY(?:KDEKDGNFFQ|QDDGTGNYYK)|YYNADEKGNYYK)|PHYKDDGFGNYYK|E(?:YYQDDGTGNYYK|ISDYDNDPNYYK|EYGDLKDVPIDD|KYGDLKDVPIDD)|V(?:HYKENKDGNY(?:YK|VK)|KY(?:PDL(?:ND(?:IEIDD|VEIDD)|KDLQIDD)|QDLKDVEIDD))|K(?:HY(?:ENDTDKNYYQ|KKDEDPNYYK)|VYPEDVTGNY(?:YK|FK)|KKKGLSELSTEK)|FS(?:SDRCGHNEGDP|ERKCGH(?:NEGSP|DENAP))|Q(?:HYK(?:DDGSVNYYK|EDKDENYYK)|NNKKLKDLTDKH)|A(?:RYKDRKDPNYYK|K(?:YEDLKTLPIDD|ERYKDIKNYYQ)))|F(?:S(?:S(?:SGPCGRDEAPV|HGKCGHNEGAP|DRCGHNNNDGP|E(?:GKCGHKEGTV|YCGHYKNGDP)|QGQCGHTEGTV)|N(?:RGPCGRNETDV|S(?:G(?:TRGRKELTV|PCGRKELTV|KCGGKEAPV)|KCGHHNNDGP)|N(?:GPCGRNETDV|KCGHSNGGDP)|PKCGHSNGGDP|E(?:HCGH(?:HNNDDP|YKNGDP)|YCGH(?:YKNGDP|KKNEDP)))|DNG(?:HCGRNETNV|PCGRKELIV))|T(?:RQGYCGHSETNV|GGGQC(?:RRNDNSV|GRNETDV)|S(?:HGKCG(?:RNETNV|HSEGAP)|IGKCGHNKGSV|EG(?:RCGHSETNV|KCG(?:RNETNV|HNDNRV)|Q(?:RGHSETNV|C(?:RRNDNSV|GH(?:SETNV|NDKSV|DENKV))))|VGYCGHNKG(?:SV|IR)|QG(?:YCG(?:RKE(?:LTV|APV)|HSETNV)|QCG(?:RNERNV|H(?:SETNV|TEGTV|KEGTV)))|AGKCR(?:RNDNSV|HNDNSV))|NDGKCG(?:RYEGAP|H(?:YE(?:GAP|NNI|D(?:NV|AP))|TEGTV))|TEGYCGR(?:NEGAP|DEGAP)|D(?:GHCGRTQEGHV|IGKCG(?:GKEAPV|H(?:GDKDV|N(?:EGAP|KGSV)|KQGNV))|DGKCGHYEGAP))|ENAGKC(?:RRNDNKV|GHNDNRV)|WDRKCGHSN(?:GGDP|EGA(?:P|L))|LYPKCGHNNKNDL)|Q(?:GIIDYDNDPNYYK|NYY(?:KDDPKKNYYK|ADDGSGNY(?:SK|VK))|ISDY(?:TGDHPNYYK|DGDGPEYYK)|K(?:SDSSLQRLSIEK|HYEDDGSGNYYK|NNSALKKLTDKQ|IY(?:EDINNLPIDD|KDLNNLPIDD))|QNNNTLENLTDKQ)|L(?:K(?:TRYKKDDDNYYQ|KHYQKDAPNYYK)|F(?:S(?:N(?:RQCGH(?:GEHEV|NEGAP|DENKV|EQGNV)|SKCGH(?:RQGNV|DE(?:SKV|NKV)|EQGNV)|YKCGHYEDAP|DYCGHKQGNV|PKCGH(?:EQG(?:NV|TV)|KQGKV)|AYCGHYEGSP)|D(?:GHCGNKDGTV|HKCGHEESRV|YKCGHYE(?:GSP|DAP)))|DYNCGHHKDNNV|WDRKCGHDERNV)|Q(?:TRYTNDGDNYFK|ERYNDPKGDFFQ|ARYKKDGDDFFK)|WNDKCGHHVDKDV|LFSNYKCGHYEGS)|A(?:RYKKDEEDGNYY(?:K|Q)|VSSNKCGHNDMNV|KNDYTGDHPNYYK|LKHYKDDTKNYYQ))/
94
+ block_sharing = 1
95
+ when self =~ /(?:RE(?:PGKQHLEERLER|KGKSRLEARLKT)|G(?:G(?:HYKNCHCIGGDV|TYKNCRCASGNV)|P(?:NQEKKLLENKLK|DQEKKKLEENLR)|ANAIKAGDNVSIV)|S(?:FTNGQCGRDGENV|WYPKCGHHVKQDV|AKEHYQDTENYYK)|H(?:NRRKEKLETRLEE|E(?:PG(?:IQ(?:HLEKRLES|YLEKRLES)|KQHL(?:GERLEQ|EERLE(?:R|Q)))|QG(?:YNRLEARLKT|NNRLEARLKT|INRLEARLKT))|QQRKHLLEKRLET)|C(?:RAEEK(?:GTYFKNRE|D(?:TYFKNRE|IYSKTTD)|EIYSKTTD)|G(?:TEDKDTYFIKSG|VEENAKYFRESS|A(?:GMKDIYSKTMN|SEDAKYKVIGP|T(?:MNDIFSKNIR|VDDI(?:SSKNIR|FSKNIR))|P(?:SDAQYFRNTC|KEAKYFRKTA)))|HAPPDAQYTKKGP|NA(?:P(?:TGADYFVYKP|KDANYFEYNS)|WGNTYFRKTCS)|DAG(?:QKDTYFKQSS|AADEYFKKSG)|EA(?:GTSDKYFRKTA|KSDDKYNVIGP)|KA(?:NDDAEYFRKKD|P(?:DKANYF(?:EPPK|KPPK)|EEDHYFKPAQ)|EVDDIYSKTAN|K(?:EGDIYSKT(?:MN|AN)|KG(?:GIYSKTMN|DIYSKTMN))))|Y(?:SQKYKDEKSKLEE|NE(?:TDKVQKAILQQ|KDQEEKRKLQE))|N(?:YYEDNDTDKNYYQ|NNAAKLSELSTAQ|THESAQRKKLEEN|DEEKKKRDELEKN|KN(?:NPPLYKLSLEK|KSPLDKLSLDK))|T(?:RYKKDDEDGNFFQ|PTQGKCHCIDGT(?:N|V)|L(?:FDYKCGHDENAP|WNEKCGHGDYNL))|I(?:EHYKDDPEENFYE|KS(?:NYNDSEGNYFK|QYDDNEGNYFK)|LFDYKCAHDNDKV)|D(?:RKEKVKLEENLKN|KGEKKKLEENLKN|Q(?:ERK(?:HLLEKRLET|QHLEKRLET)|QEK(?:LYLENNLKK|AKLENNLKR))|AKKHYGDDENYYK)|P(?:NKCRCEDANADQV|CSVQKCTCINGDP)|E(?:SN(?:M(?:GQCRCFSGDP|VQCRCFSGDP)|KGQCRCFSGDP)|THGYCRCVNRVDV|F(?:SGGKCGHKDNNV|T(?:GGQCGRDGENV|DGHCGH(?:RQGNV|NEENV))))|VKDRYQNDGPDFFK|K(?:S(?:SYNDDGTGNYFK|YYKNDNDRNYFK)|N(?:HYNDTSKNYYK|NNN(?:ELNNLSLDK|KLSNLSTKE))|D(?:HYKGDEANNYFQ|NNTKLNDLSIQE)|K(?:TNPALKSFTNEE|KLEENLRNIFKN)|FSNPKCGHNEGSP)|F(?:SNDQCGHNN(?:RGDP|GGAP)|WYPKC(?:GHHV(?:RQDV|KQ(?:DV|EV))|SHHVKQDV))|Q(?:RNNIKLQ(?:NIPLHE|TLTLHQ)|N(?:NNTKLQNIPLHE|KNENLKSLSLDK)|KENGDINTLKPEE)|L(?:FYYKCGHYVYKDV|WN(?:Y(?:NCGHHVN(?:RDV|QDV)|KCGHHVNQDV)|DKCGHHVKQDV))|ARDHYNDTSGNYYQ)/
96
+ block_sharing = 2
97
+ else
98
+ block_sharing = 0
99
+ end
100
+ block_sharing
101
+ end
151
102
 
152
- #get the block sharing group for this tag
153
- #puts seq.bs_group #to be implemented
103
+ #distict sequence identifier(DSID)
104
+ def dsid
105
+ "#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
106
+ end
154
107
 
155
- #get the length of the tag
156
- #puts seq.size
108
+ #position specific polymorphic block 1
109
+ def pspb1(anchor_pos=0,win_len=14)
110
+ self[14 + anchor_pos,win_len]
111
+ end
157
112
 
158
- #get the pspb1
159
- #puts seq.pspb1(0,14)
113
+ #position specific polymorphic block 2
114
+ def pspb2(anchor_pos=0,win_len=14)
115
+ if !ww_missing?
116
+ return self[ww_pos - 4 - anchor_pos - win_len, win_len]
117
+ elsif !vw_missing?
118
+ return self[vw_pos - 12 - win_len - anchor_pos, win_len]
119
+ else
120
+ return '....'
121
+ end
122
+ end
160
123
 
161
- #get the pspb2
162
- #puts seq.pspb2(0,14)
124
+ #position specific polymorphic block 3
125
+ def pspb3(anchor_pos=0,win_len=14)
126
+ if !ww_missing?
127
+ return self[ww_pos + 14 + anchor_pos, win_len]
128
+ elsif !vw_missing?
129
+ return self[vw_pos + 6 + anchor_pos, win_len]
130
+ else
131
+ return '....'
132
+ end
133
+ end
163
134
 
164
- #get the pspb3
165
- #puts seq.pspb3(0,14)
135
+ #position specific polymorphic block 4
136
+ def pspb4(anchor_pos=0,win_len=14)
137
+ self[self.length - 12 - win_len - anchor_pos, win_len]
138
+ end
166
139
 
167
- #get the pspb4
168
- #puts seq.pspb4(0,14)
140
+ private
141
+ def accepted_length
142
+ 100..168
143
+ end
169
144
 
145
+ def ww_missing?
146
+ true unless self =~ /WW/i
147
+ end
170
148
 
171
- #if input file is a fasta file
172
- #seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
149
+ def vw_missing?
150
+ true unless self =~ /VW/i
151
+ end
173
152
 
174
- #read the file
175
- #Bio::FlatFile.open(seq_file).each do |entry|
176
- #tag = Bio::Sequence::AA.new(entry.seq)
177
- #puts tag.start_motif
178
- #puts tag.end_motif
179
- #puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
180
- #end
153
+ def vw_ww_missing?
154
+ true if ww_missing? && vw_missing?
155
+ end
156
+ end
@@ -1,32 +1,54 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "Dbla" do
4
- context 'a group4 Dbla tag' do
5
- before(:each) do
6
- seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
7
- @tag = Bio::Sequence::AA.new(seq)
8
- end
9
-
10
- it "should return the number of cysteines" do
11
- @tag.cys_count.should == 4
12
- end
4
+ context 'Group4 Dbla tag' do
5
+ before(:each) do
6
+ seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
7
+ @tag = Bio::Sequence::AA.new(seq)
8
+ end
13
9
 
14
- it 'should return a dsid' do
15
- @tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
16
- end
10
+ it "should return the number of cysteines" do
11
+ @tag.cys_count.should == 4
12
+ end
17
13
 
18
- it 'should return the cyspolv group' do
19
- @tag.cyspolv_group.should == 4
20
- end
14
+ it 'should return a dsid' do
15
+ @tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
16
+ end
21
17
 
22
- it 'should return the length' do
23
- @tag.length.should == 126
24
- end
18
+ it 'should return the cyspolv group' do
19
+ @tag.cyspolv_group.should == 4
20
+ end
21
+
22
+ it 'should return the length' do
23
+ @tag.length.should == 126
24
+ end
25
+
26
+ it 'should return the start motif' do
27
+ @tag.start_motif.should == 'YIGDI'
28
+ end
29
+
30
+ it 'should return false for var1' do
31
+ @tag.is_var1?.should be_false
32
+ end
25
33
 
26
- it 'should return the start motif' do
27
- @tag.start_motif == 'YIGDI'
28
34
  end
29
35
 
30
- end
31
- end
36
+ context 'Group2 Dbla tag' do
37
+ before(:each) do
38
+ seq = 'DIGDIVRGTDLFLGGPSQEKKKLEENLKKILENIKNKNTKLSTLTLEKVREYWWALNRNDVWKALTCSAPYEAQYFIKSSDKEHSFSSEYCGHHNNDDPLTNLDYVPQFLR'
39
+ @tag2 = Bio::Sequence::AA.new(seq)
40
+ end
41
+
42
+ it 'should return the number of cysteines' do
43
+ @tag2.cys_count.should == 2
44
+ end
32
45
 
46
+ it 'should return the block sharing group 1' do
47
+ @tag2.bs_group.should == 1
48
+ end
49
+
50
+ it 'should return false for cp2 var1' do
51
+ @tag2.is_var1_cp2?.should be_false
52
+ end
53
+ end
54
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-dbla-classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-13 00:00:00.000000000Z
12
+ date: 2012-02-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio
16
- requirement: &2155795540 !ruby/object:Gem::Requirement
16
+ requirement: &2152850760 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.4.2
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2155795540
24
+ version_requirements: *2152850760
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &2155794860 !ruby/object:Gem::Requirement
27
+ requirement: &2152849320 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,21 +32,21 @@ dependencies:
32
32
  version: 2.3.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2155794860
35
+ version_requirements: *2152849320
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: bundler
38
- requirement: &2155787960 !ruby/object:Gem::Requirement
38
+ requirement: &2152848240 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
42
42
  - !ruby/object:Gem::Version
43
- version: 1.0.0
43
+ version: 1.1.rc.7
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2155787960
46
+ version_requirements: *2152848240
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: jeweler
49
- requirement: &2155786180 !ruby/object:Gem::Requirement
49
+ requirement: &2152846960 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.6.4
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2155786180
57
+ version_requirements: *2152846960
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rcov
60
- requirement: &2155784980 !ruby/object:Gem::Requirement
60
+ requirement: &2152855080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2155784980
68
+ version_requirements: *2152855080
69
69
  description: Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags
70
70
  email: georgkam@gmail.com
71
71
  executables: []
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
103
  version: '0'
104
104
  segments:
105
105
  - 0
106
- hash: -1659999011793222104
106
+ hash: 3330781070760048632
107
107
  required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  none: false
109
109
  requirements:
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  requirements: []
114
114
  rubyforge_project:
115
- rubygems_version: 1.8.10
115
+ rubygems_version: 1.8.12
116
116
  signing_key:
117
117
  specification_version: 3
118
118
  summary: A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags