bio-dbla-classifier 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -5,7 +5,7 @@ source "http://rubygems.org"
5
5
  # Needed to run rake, tests, features
6
6
  group :development do
7
7
  gem "rspec", "~> 2.3.0"
8
- gem "bundler", "~> 1.0.0"
8
+ gem "bundler", "~> 1.1.rc.7"
9
9
  gem "jeweler", "~> 1.6.4"
10
10
  gem "rcov", ">= 0"
11
11
  end
@@ -24,7 +24,7 @@ PLATFORMS
24
24
 
25
25
  DEPENDENCIES
26
26
  bio (>= 1.4.2)
27
- bundler (~> 1.0.0)
27
+ bundler (~> 1.1.rc.7)
28
28
  jeweler (~> 1.6.4)
29
29
  rcov
30
30
  rspec (~> 2.3.0)
@@ -6,9 +6,8 @@ If you use this plugin please quote,
6
6
  Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
7
7
 
8
8
  = Installation
9
- You need to have Ruby installed on your system. This plugin has been tested on Ruby 1.9.2-p290. See http://rubylang.info/ for
10
- information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
11
- install the gem. This will install the bioruby gem if it is not already installed on your system.
9
+ Ruby must be installed on your system. See http://rubylang.info/ for information on Ruby and how to install it on your system. Once Ruby 1.9.2 is installed type the following command in the terminal to
10
+ install the gem. This will install the bioruby gem if it is not already installed on your system. The plugin has been tested on Ruby 1.9.2-p290.
12
11
 
13
12
  gem install bio-dbla-classifier
14
13
 
@@ -41,34 +40,36 @@ install the gem. This will install the bioruby gem if it is not already installe
41
40
 
42
41
 
43
42
  #get the block sharing group for this tag
44
- #puts dbl_seq.bs_group #to be implemented
43
+ puts dbl_seq.bs_group #=> 1
45
44
 
46
45
  #get the length of the tag
47
46
  puts dbl_seq.size #=> 115
48
47
 
48
+ dbl_seq.is_var1? #=> false
49
+
49
50
  = Finding the Position Specific Polymorphic Blocks(PSPB)
50
51
 
51
- The pspb methods take 2 arguments, an anchor position and a window length that defines the length of the pspb
52
+ The pspb methods take 2 arguments, an anchor position and a window length that
53
+ defines the length of the pspb.The default anchor position is 0 and the default
54
+ window length is 14
52
55
 
53
56
  #get pspb1
54
- puts seq.pspb1(0,14) #=> NPEVEKGLKAVFRK
57
+ puts seq.pspb1 #=> NPEVEKGLKAVFRK
55
58
 
56
59
  #get pspb2
57
- puts seq.pspb2(0,14) #=> THYADEDGSGNYVK
60
+ puts seq.pspb2 #=> THYADEDGSGNYVK
58
61
 
59
62
  #get pspb3
60
- puts seq.pspb3(0,14) #=> CKAPQSVHYFIKTS
63
+ puts seq.pspb3 #=> CKAPQSVHYFIKTS
61
64
 
62
65
  #get pspb4
63
- puts seq.pspb4(0,14) #=> FTSHGKCGRNETNV
64
-
65
- = Processing fasta files
66
+ puts seq.pspb4 #=> FTSHGKCGRNETNV
66
67
 
67
- If the input is a fasta file,
68
+ = Processing a flatfile for example fasta, genbank or embl
68
69
 
69
70
  seq_file = "sequences.fasta"
70
71
 
71
- #Process each entry in the file
72
+ #for each entry in the file
72
73
  Bio::FlatFile.open(seq_file).each do |entry|
73
74
  tag = Bio::Sequence::AA.new(entry.seq)
74
75
  puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
@@ -76,5 +77,4 @@ If the input is a fasta file,
76
77
 
77
78
  = Copyright
78
79
 
79
- See LICENSE.txt for further details.
80
-
80
+ See LICENSE.txt for further details
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.5.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-dbla-classifier"
8
- s.version = "0.4.0"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["George Githinji"]
12
- s.date = "2011-10-13"
12
+ s.date = "2012-02-20"
13
13
  s.description = "Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags"
14
14
  s.email = "georgkam@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -35,7 +35,7 @@ Gem::Specification.new do |s|
35
35
  s.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
36
36
  s.licenses = ["Ruby"]
37
37
  s.require_paths = ["lib"]
38
- s.rubygems_version = "1.8.10"
38
+ s.rubygems_version = "1.8.12"
39
39
  s.summary = "A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags"
40
40
 
41
41
  if s.respond_to? :specification_version then
@@ -44,20 +44,20 @@ Gem::Specification.new do |s|
44
44
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
45
  s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
46
46
  s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
47
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
47
+ s.add_development_dependency(%q<bundler>, ["~> 1.1.rc.7"])
48
48
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
49
49
  s.add_development_dependency(%q<rcov>, [">= 0"])
50
50
  else
51
51
  s.add_dependency(%q<bio>, [">= 1.4.2"])
52
52
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
53
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
53
+ s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
54
54
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
55
55
  s.add_dependency(%q<rcov>, [">= 0"])
56
56
  end
57
57
  else
58
58
  s.add_dependency(%q<bio>, [">= 1.4.2"])
59
59
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
60
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
60
+ s.add_dependency(%q<bundler>, ["~> 1.1.rc.7"])
61
61
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
62
62
  s.add_dependency(%q<rcov>, [">= 0"])
63
63
  end
@@ -1,180 +1,156 @@
1
1
  class Bio::Sequence::AA
2
2
 
3
- def has_accepted_length?
4
- true if accepted_length.include? self.length
5
- end
6
-
7
- def start_motif
8
- self[0,5]
9
- end
10
-
11
- def end_motif
12
- self[-5,self.length]
13
- end
14
-
15
- def dsid
16
- "#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
17
- end
18
-
19
- def ww_pos
20
- rindex("WW")
21
- end
22
-
23
- def vw_pos
24
- rindex("VW")
25
- end
26
-
27
- #number of cysteines
28
- def cys_count
29
- scan(/C/).size
30
- end
31
-
32
- #The first position of limited variability(polv1)
33
- def polv1
34
- self[10,4]
35
- end
36
-
37
- #The second position of limited variability(polv2)
38
- def polv2
39
- if self =~ /WW/
40
- polv2 = self[ww_pos - 4,4]
41
- elsif self =~ /VW/
42
- polv2 = self[vw_pos - 12,4]
43
- else
44
- error = 'WW or VW motif missing'
45
- end
46
- polv2 unless error
47
- end
48
-
49
- #The third position of limited variability(polv3)
50
- def polv3
51
- if self =~ /WW/
52
- polv3 = self[ww_pos + 10,4]
53
- elsif self =~ /VW/
54
- polv3 = self[vw_pos + 2,4]
55
- else
56
- error = 'WW or VW motif missing'
57
- end
58
-
59
- polv3 unless error
60
-
61
- end
62
-
63
- #The fourth position of limited variability(polv4)
64
- def polv4
65
- self[self.length - 12,4]
66
- end
67
-
68
- #Assigning dsid group based on cysteines coun and presence of
69
- #REY motif in polv2, MFK in polv1,
70
- def cyspolv_group
71
- case
72
- when cys_count > 4 || cys_count == 3 || cys_count < 2
73
- group = 6
74
- when cys_count == 4 && polv2 =~ /REY/i
75
- group = 5
76
- when cys_count == 4
77
- group = 4
78
- when cys_count == 2 && polv1 =~ /MFK/i
79
- group = 1
80
- when cys_count == 2 && polv2 =~ /REY/i
81
- group =2
82
- else
83
- group = 3
84
- end
85
- group
86
- end
87
-
88
- #position specific polymorphic block 1
89
- def pspb1(anchor_pos,win_len)
90
- self[14 + anchor_pos,win_len]
91
- end
92
-
93
- #position specific polymorphic block 2
94
- def pspb2(anchor_pos,win_len)
95
- if self =~ /WW/
96
- pspb2 = self[ww_pos - 4 - anchor_pos - win_len, win_len]
97
- elsif self =~ /VW/
98
- pspb2 = self[vw_pos - 12 - win_len - anchor_pos, win_len]
99
- else
100
- error = 'WW or VW motif missing'
101
- end
102
- pspb2
103
- end
104
-
105
- #position specific polymorphic block 3
106
- def pspb3(anchor_pos,win_len)
107
- if self =~ /WW/
108
- pspb3 = self[ww_pos + 14 + anchor_pos, win_len]
109
- elsif self =~ /VW/
110
- pspb3 = self[vw_pos + 6 + anchor_pos, win_len]
111
- else
112
- error = 'WW or VW motif missing'
113
- end
114
- pspb3
115
- end
116
-
117
- #position specific polymorphic block 4
118
- def pspb4(anchor_pos,win_len)
119
- self[self.length - 12 - win_len - anchor_pos, win_len]
120
- end
121
-
122
-
123
- private
124
- def accepted_length
125
- 100..168
126
- end
3
+ def has_accepted_length?
4
+ true if accepted_length.include? self.length
5
+ end
127
6
 
128
- end
7
+ def start_motif
8
+ self[0,5]
9
+ end
10
+
11
+ def end_motif
12
+ self[-5,self.length]
13
+ end
14
+
15
+ def ww_pos
16
+ rindex("WW")
17
+ end
18
+
19
+ def vw_pos
20
+ rindex("VW")
21
+ end
22
+
23
+ #number of cysteines
24
+ def cys_count
25
+ scan(/C/).size
26
+ end
27
+
28
+ #The first position of limited variability(polv1)
29
+ def polv1
30
+ self[10,4]
31
+ end
32
+
33
+ #The second position of limited variability(polv2)
34
+ def polv2
35
+ if !ww_missing?
36
+ return self[ww_pos - 4,4]
37
+ elsif !vw_missing?
38
+ return self[vw_pos - 12,4]
39
+ else
40
+ return '....'
41
+ end
42
+ end
129
43
 
130
- #create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
131
- #to classify and describe Dbla properties
44
+ #The third position of limited variability(polv3)
45
+ def polv3
46
+ if !ww_missing?
47
+ return self[ww_pos + 10,4]
48
+ elsif !vw_missing?
49
+ return self[vw_pos + 2,4]
50
+ else
51
+ return '....'
52
+ end
53
+ end
132
54
 
133
- #seq1 ='DIGDIVRGRDMFKSNPEVEKGLKAVFRKINNGLTPQAKTHYADEDGSGNYVKLREDWWKANRDQVWKAITCKAPQSVHYFIKTSHGTRGFTSHGKCGRNETNVPTNLDYVPQYLR'
134
- #seq = Bio::Sequence::AA.new(seq1)
55
+ #The fourth position of limited variability(polv4)
56
+ def polv4
57
+ self[self.length - 12,4]
58
+ end
135
59
 
136
- #get the positions of limited variability
137
- #puts seq.polv1
138
- #puts seq.polv2
139
- #puts seq.polv3
140
- #puts seq.polv4
60
+ #Assigning dsid group based on number of cysteines, presence of REY motif in polv2 and MFK in polv1,
61
+ def cyspolv_group
62
+ case
63
+ when cys_count > 4 || cys_count == 3 || cys_count < 2
64
+ group = 6
65
+ when cys_count == 4 && polv2 =~ /REY/i
66
+ group = 5
67
+ when cys_count == 4
68
+ group = 4
69
+ when cys_count == 2 && polv1 =~ /MFK/i
70
+ group = 1
71
+ when cys_count == 2 && polv2 =~ /REY/i
72
+ group =2
73
+ else
74
+ group = 3
75
+ end
76
+ group
77
+ end
141
78
 
142
- #get the number if cysteines in the tag
143
- #puts seq.cys_count
79
+ def is_var1_cp1?
80
+ return true if cyspolv_group == 1 && self =~ /NVHDKVEKGLREVF|NVHDKVETGLREVF/i
81
+ end
144
82
 
145
- #get the distinct sequence identifier
146
- #puts seq.dsid
83
+ def is_var1_cp2?
84
+ return true if cyspolv_group == 2 && self =~ /APNKEKIKLEENLKK/i
85
+ end
147
86
 
148
- #get the cyspolv group for this tag
149
- #puts seq.cyspolv_group
87
+ def is_var1?
88
+ return true if is_var1_cp1? || is_var1_cp2?
89
+ end
150
90
 
91
+ def bs_group
92
+ case
93
+ when self =~ /(?:R(?:HYADHDKSGNYYK|NENNNLGKLSNEQ|I(?:RHYDDGSGNY(?:SK|YK)|THYN(?:GVSGNCVK|DGSGNYVK))|E(?:RYKDLKDVEIDD|HYKEVKNGNY(?:YK|IK)|KYKDLKD(?:VEIDD|LPIDD))|VKETYKDDPNYYK|KNNSSLRKLTNEQ)|G(?:G(?:RGRK(?:KLEDNL(?:IE|KE)|QLEENLQK)|GGRKKLEDNLKE)|I(?:N(?:D(?:Y(?:ND(?:GSGNYFK|ISGNYYK)|D(?:RDGPE(?:HYK|YYK)|GDGPEYYK))|CDRDGPEYYK)|AYNDGSENYYK)|IDYDHDGPHYYK)|P(?:SQEK(?:IKLEENLK|KKLEENLK)|KQEKKELEENLK)|K(?:NYPDDGSGN(?:YYK|FYK)|K(?:YYND(?:G(?:SGNYYK|TGNYVK)|ETGNYYK)|KEKEKIYGNIE)|QYYNDENGNYYK)|QTYPDDGSGNYYK)|MESNANLKKHTLER|S(?:STNTQCRCATNDV|HY(?:TDTHGSIDYDK|ED(?:GDKSGNYYK|KDKSGNYIK)|ADHDKSGNY(?:YK|LK))|NKEKEKIENSLQN|DYKDDD(?:GSGNYYK|IDGNYY(?:K|Q))|E(?:GKCGHKETERDL|KVEYGLRKLFKK)|VQERYGNDPNFFQ|KI(?:NDYDGDPNYYK|TDYDNDPNYYK)|QGQCGRNENNGYP|F(?:S(?:SEYCGH(?:RQGS(?:V|A)|GDNEV|EQGNV)|N(?:GQCGHRDENV|SKCGHGEHEV|DYCGH(?:RQGSV|NENKV)|PKCGH(?:G(?:DNEV|EHEV)|NENKV|EQGNV)|EYCGHRQGSV)|D(?:RKCGHYEGAP|HKCGH(?:GDKDV|YEGAP|DENAP)))|TN(?:GQCGHNEENV|PKCG(?:RGDNEV|H(?:G(?:DNEV|EHEV)|DENKV)))|ADAYCGRGDENV)|LILPYSKCGRDTD)|H(?:HYKDDDISGNYSK|SKEKEKLQTNLKN|N(?:H(?:IKKPLLENLEQ|KKKPLL(?:DNLEK|ENLEQ))|NKKKALLDNLEK|QKKINLEKSLHR)|EQG(?:YNKLEAI(?:SKT|LKT)|NNKLEA(?:RLKT|ILKT))|QQRK(?:RKLEENLRN|GKLEENLRN))|C(?:RAP(?:N(?:GANYFRKGL|EANYFKNVA)|KNAHYFIKSS|QKANYFKNVA)|G(?:TGENDTYFKNSS|A(?:G(?:EKDTYF(?:TYS(?:N|K)|VQLD)|A(?:RDEYFIKPS|KDTYFTYSK))|TMNDIFSKNIG|LPKSAY(?:VLQSE|F(?:MQLE|LQSE))))|S(?:VPYEAYYFTYKS|A(?:G(?:PKDTYFIKSG|QKDTYFIKPN)|D(?:GSE(?:DYFIKSS|EYF(?:IQSE|KKQS))|DSEDYFI(?:RSE|QSE))|P(?:RDA(?:DYFIKNS|QYFIKSS)|GD(?:VNYFRK(?:GL|IS|ES|FS)|AKYVK(?:YFP|NFP))|HNAQYVKYVP|Y(?:GANYYRKYS|HPGYFRQSK|YADYF(?:RKGS|K(?:SVA|KK(?:S|P)))|NAHYFIKSS|CADYFKKKS|DANYVRRKS|EA(?:YYFTYKS|QYFIKSS)|KSQYFIKSS)|D(?:YAKYFRQTC|NAKY(?:VKYFP|FKPPK)))|QNNEVYFINSE))|Y(?:IPY(?:YVNYFK(?:NIS|DIS|K(?:T(?:S|P)|KS))|CVNYFKNIS)|APNNANYFIGSG)|NAP(?:GD(?:VHYFRKDP|AHYFRKDP)|Y(?:DANY(?:YR(?:KYS|QTC)|VRRKS|FRKTS)|EAQY(?:YIKSS|FIK(?:SS|PS))|K(?:SRYF(?:M(?:HSE|QSE)|IQSE)|A(?:QYYIKSS|WYFMHSE)))|NISGYFMQS(?:G|E)|D(?:NVNYFRKYS|KAEYFVYKS))|TAP(?:YGANYYRKYS|D(?:NVNYFRKYS|KANYFIYKS))|IAPRDAHYFLKSS|D(?:TEESDTYFKQSS|A(?:SYKSGYFMQSE|P(?:RDA(?:HYFLKSS|NFFIKNS|DYF(?:RKGS|KNVA))|YKSRYF(?:MQSE|IQSE)|KDANYFIGSG|QKVDYFRK(?:GS|IS))))|EA(?:SKNANFFIK(?:NS|DS)|P(?:GDAHYFRKGP|ENAYIIKRRI|KDANYFIGSG|QKVDYFRKG(?:S|L)))|VA(?:GEGNTYFIQLD|PENAYFRKTEA)|KA(?:S(?:RNA(?:HYFLKSS|NYFRK(?:IS|AL))|KNANFFIKNS)|P(?:GDVN(?:YFRKIS|FFIKNS)|NGANYFRKKS|TGA(?:HYFLKSS|DYF(?:VYKP|KKKS))|PKVDYSRNIS|EDADYFRKGS|K(?:GANYFRKES|DA(?:HYFLKSS|N(?:YFIGSG|FFIKIS)|DYFRKGS))|Q(?:GANYFRNIS|SVHYFIKTS|DANYF(?:R(?:N(?:IS|VS)|K(?:GL|IS))|TKES)|K(?:VDYFRKGS|ANYFRKGS)))|KEGDIYSKTTD)|FA(?:HNTEEYFIKSE|DGSEEYFI(?:KSS|QS(?:S|E)))|AAR(?:GNDLYSKNIG|YHPGYFKKSD))|Y(?:NERDR(?:DKKRKLQE|EKKRKLQD|AQKKKLQD)|DEKEKNRRKQLEN|KAP(?:RKA(?:NYFIYKS|DYFRNIS)|KDAHYFLKSS|QDANYFRNVS))|N(?:RKEKGKLQTNLKN|GDYK(?:EKVSNNL(?:RA|KT)|KKVSNNLKT)|HYKDD(?:NGS(?:GNYYK|ENYYK)|D(?:GSGNYYK|ISGNY(?:SK|YK)))|SDDKVE(?:NGLKKVF|KGLREVF)|Y(?:Y(?:NNTGNN(?:V(?:NYAK|DYVK)|ANYAK)|ADGDKSGNYYK)|NYDEDGSGNY(?:YK|VK))|N(?:HDNVE(?:NGL(?:REVF|KAVF)|KGLK(?:KVF|AVF))|D(?:ND(?:RVKKEKLQN|K(?:IKK(?:GKLRG|EKL(?:RG|QE))|VKKEKL(?:RG|QN)))|DDKIKKGKLRG|V(?:EKGL(?:DVVFKK|KVVFKK)|VKGLDVVFKK))|E(?:SE(?:IKRKEKLRG|KKKREELQG)|TDKEQKVKLEK|KDM(?:REKQKLQS|TEKQKLQS))|VDAVQEGLKVVF|KE(?:NEKLQENLKR|KEKIEKSLQN))|TVDK(?:IHEGLKVVF|VHEGLKVVF)|D(?:NVE(?:NGLREVFKK|KGLK(?:KVFDK|AVFRK))|DVEKGLKIVFEK|EDDVEKGLKIVF|K(?:D(?:YVENGLKKVF|A(?:V(?:RHGLKVVF|QKGLRAVF)|AQKVLRTVF))|EKDQRKKLDE(?:N|I)|VE(?:NGL(?:REVFKK|KKVFDK)|KGL(?:REVF(?:RK|KK)|QVVFGK)))|QD(?:DVEKGLKIVF|EVWNGLRSVF)|ADKV(?:EKGLQVVF|QKGLQVVF))|P(?:E(?:DKVHEGLKVVF|VE(?:NGLREVFNK|KGLKAVFRK))|QDKVQ(?:EGLK(?:NVF|VVF)|KGLREVF))|E(?:MVEIGLKKVFKK|HYKEVKNGNYVK|NVEKGL(?:K(?:IVFEK|KVFDK)|QVVFGK)|D(?:DKVQKGLQVVF|VEKGLKVVF(?:KK|QK))|EDAVQKGL(?:RAVF|K(?:VVF|KVF))|K(?:DAVQNGLKKVF|VE(?:YGLRKLFKK|IGLKKVF(?:DK|EK|KK)))|QDEVWKGLRDVF)|VH(?:YK(?:DDGS(?:GNYYK|ENYYK)|EVKNGNYVK)|DKVE(?:RGLREVF|TGLREVF|KGL(?:REVF|QVV(?:F|L))))|K(?:HDNIEKGLREVF|N(?:NVPL(?:HNLSLDK|DKLSLDK)|K(?:SPLDKLSLEQ|PPLDKLSVDK)|VEIGLK(?:NVFKN|KVFDK))|DDK(?:IEKSLRAIF|V(?:EKGLRAIF|QKGL(?:RAVF|KAVF|QVVF)))|Q(?:RKKILQEKLEN|EKEKREKLDEN))|FSNPKCGH(?:DEGIV|KQGNV)|QEDKVQEGLKVVF|LILTHPKCGHDTD)|I(?:SYYNADEKGNFYK|HNYDDNGSGNYYK|E(?:TRY(?:GSDTTNYYQ|ENDGPNYYQ)|ARYKKDDDNYYQ)|VSFDQCGHND(?:MDV|VDV)|KNDKTLNNLSNGQ|FSNEHCGHKQGSV)|T(?:SEGQCGHNDKMRP|HYADEDGS(?:GNYVK|ENYYK)|D(?:N(?:D(?:EVW(?:TGLRSVF|KGL(?:RSVF|GSVF))|AVQKGLRAVF)|VE(?:NGLREVFKK|KGLRAVFGK))|K(?:D(?:YVENGLK(?:KVF|AVF)|DVENGLREVF|EV(?:KEGLKVVF|WKGLRAVF)|AVQKGLRAVF)|VENGLK(?:EVFDK|KVFD(?:N|K))))|E(?:GYCGRNENNGYP|TLYKDEEGNYLK|KDDVEKGLKIVF)|V(?:S(?:SNKCGHNDMDV|NAKRREGDENP|FDQCGHNDM(?:HV|DV))|K(?:GTYKDDPYYYK|ETYKDDPNYYK))|F(?:S(?:GYWCGHYEGAP|NDYCGHGEHEV)|TYTKCGHDENKV))|D(?:SRTDKLEENLRKI|NNSDKLRDLSVDK|DDVEKGLKIVFEK|K(?:GEKKKLEKNLKD|NRGKLGALSLDD))|P(?:SY(?:IKCGHNNKDDP|LKCGHNNKDDP)|HYTNDRGLADYVK)|E(?:HY(?:EDVDGSGNYLK|KDVDGSGNYYK)|YY(?:NDTNNKINYVK|EDKDPDKNYYQ)|KNY(?:YNDGTGNYYK|PDDGSGNYSK)|FT(?:GGYCGRDETDV|SGYCGRNETNV))|V(?:NGNDKLESNLKKI|KAHY(?:KKDAPYYYK|QKDAPNYYK)|FSNRQCGHYED(?:VP|AP))|K(?:RYYNDDTD(?:NN(?:FYQ|LYQ)|DNFYQ)|GINDYDGDPNYYK|SYY(?:NAD(?:GEGNFYK|EKGNYYK)|DADEKGNYYK)|HY(?:TDTHGSIDYDK|A(?:H(?:GDGSGNY(?:SK|YK|LK)|DDGS(?:GNYYK|VNYYK))|DEDGSGNYYK))|YYNDTNNKINYVK|N(?:Y(?:YNPD(?:G(?:SGNYYK|AGNYYK)|EAGNYYK)|NYD(?:EDGPEYYK|KDGPEYYK))|NDRTLNNLSIGQ|DYNPDGSGNY(?:YK|FK)|E(?:SE(?:IKRKEKLQR|K(?:RTKEKLQG|NTKKKLQG))|NTDLNKLTTEK)|KNTKLSTLTLEK|AYPDDG(?:SGNY(?:YK|FK)|FGNYYK))|T(?:SN(?:SN(?:MDTLSLEQ|LKELSLDK)|TNMNTLSLDK)|IYADLKDVEIDD)|I(?:NDYDGDGPEYYK|THYDDISGNYYK|KDYDGDGPEYYK)|D(?:HY(?:KDEKDGNFFQ|QDDGTGNYYK)|YYNADEKGNYYK)|PHYKDDGFGNYYK|E(?:YYQDDGTGNYYK|ISDYDNDPNYYK|EYGDLKDVPIDD|KYGDLKDVPIDD)|V(?:HYKENKDGNY(?:YK|VK)|KY(?:PDL(?:ND(?:IEIDD|VEIDD)|KDLQIDD)|QDLKDVEIDD))|K(?:HY(?:ENDTDKNYYQ|KKDEDPNYYK)|VYPEDVTGNY(?:YK|FK)|KKKGLSELSTEK)|FS(?:SDRCGHNEGDP|ERKCGH(?:NEGSP|DENAP))|Q(?:HYK(?:DDGSVNYYK|EDKDENYYK)|NNKKLKDLTDKH)|A(?:RYKDRKDPNYYK|K(?:YEDLKTLPIDD|ERYKDIKNYYQ)))|F(?:S(?:S(?:SGPCGRDEAPV|HGKCGHNEGAP|DRCGHNNNDGP|E(?:GKCGHKEGTV|YCGHYKNGDP)|QGQCGHTEGTV)|N(?:RGPCGRNETDV|S(?:G(?:TRGRKELTV|PCGRKELTV|KCGGKEAPV)|KCGHHNNDGP)|N(?:GPCGRNETDV|KCGHSNGGDP)|PKCGHSNGGDP|E(?:HCGH(?:HNNDDP|YKNGDP)|YCGH(?:YKNGDP|KKNEDP)))|DNG(?:HCGRNETNV|PCGRKELIV))|T(?:RQGYCGHSETNV|GGGQC(?:RRNDNSV|GRNETDV)|S(?:HGKCG(?:RNETNV|HSEGAP)|IGKCGHNKGSV|EG(?:RCGHSETNV|KCG(?:RNETNV|HNDNRV)|Q(?:RGHSETNV|C(?:RRNDNSV|GH(?:SETNV|NDKSV|DENKV))))|VGYCGHNKG(?:SV|IR)|QG(?:YCG(?:RKE(?:LTV|APV)|HSETNV)|QCG(?:RNERNV|H(?:SETNV|TEGTV|KEGTV)))|AGKCR(?:RNDNSV|HNDNSV))|NDGKCG(?:RYEGAP|H(?:YE(?:GAP|NNI|D(?:NV|AP))|TEGTV))|TEGYCGR(?:NEGAP|DEGAP)|D(?:GHCGRTQEGHV|IGKCG(?:GKEAPV|H(?:GDKDV|N(?:EGAP|KGSV)|KQGNV))|DGKCGHYEGAP))|ENAGKC(?:RRNDNKV|GHNDNRV)|WDRKCGHSN(?:GGDP|EGA(?:P|L))|LYPKCGHNNKNDL)|Q(?:GIIDYDNDPNYYK|NYY(?:KDDPKKNYYK|ADDGSGNY(?:SK|VK))|ISDY(?:TGDHPNYYK|DGDGPEYYK)|K(?:SDSSLQRLSIEK|HYEDDGSGNYYK|NNSALKKLTDKQ|IY(?:EDINNLPIDD|KDLNNLPIDD))|QNNNTLENLTDKQ)|L(?:K(?:TRYKKDDDNYYQ|KHYQKDAPNYYK)|F(?:S(?:N(?:RQCGH(?:GEHEV|NEGAP|DENKV|EQGNV)|SKCGH(?:RQGNV|DE(?:SKV|NKV)|EQGNV)|YKCGHYEDAP|DYCGHKQGNV|PKCGH(?:EQG(?:NV|TV)|KQGKV)|AYCGHYEGSP)|D(?:GHCGNKDGTV|HKCGHEESRV|YKCGHYE(?:GSP|DAP)))|DYNCGHHKDNNV|WDRKCGHDERNV)|Q(?:TRYTNDGDNYFK|ERYNDPKGDFFQ|ARYKKDGDDFFK)|WNDKCGHHVDKDV|LFSNYKCGHYEGS)|A(?:RYKKDEEDGNYY(?:K|Q)|VSSNKCGHNDMNV|KNDYTGDHPNYYK|LKHYKDDTKNYYQ))/
94
+ block_sharing = 1
95
+ when self =~ /(?:RE(?:PGKQHLEERLER|KGKSRLEARLKT)|G(?:G(?:HYKNCHCIGGDV|TYKNCRCASGNV)|P(?:NQEKKLLENKLK|DQEKKKLEENLR)|ANAIKAGDNVSIV)|S(?:FTNGQCGRDGENV|WYPKCGHHVKQDV|AKEHYQDTENYYK)|H(?:NRRKEKLETRLEE|E(?:PG(?:IQ(?:HLEKRLES|YLEKRLES)|KQHL(?:GERLEQ|EERLE(?:R|Q)))|QG(?:YNRLEARLKT|NNRLEARLKT|INRLEARLKT))|QQRKHLLEKRLET)|C(?:RAEEK(?:GTYFKNRE|D(?:TYFKNRE|IYSKTTD)|EIYSKTTD)|G(?:TEDKDTYFIKSG|VEENAKYFRESS|A(?:GMKDIYSKTMN|SEDAKYKVIGP|T(?:MNDIFSKNIR|VDDI(?:SSKNIR|FSKNIR))|P(?:SDAQYFRNTC|KEAKYFRKTA)))|HAPPDAQYTKKGP|NA(?:P(?:TGADYFVYKP|KDANYFEYNS)|WGNTYFRKTCS)|DAG(?:QKDTYFKQSS|AADEYFKKSG)|EA(?:GTSDKYFRKTA|KSDDKYNVIGP)|KA(?:NDDAEYFRKKD|P(?:DKANYF(?:EPPK|KPPK)|EEDHYFKPAQ)|EVDDIYSKTAN|K(?:EGDIYSKT(?:MN|AN)|KG(?:GIYSKTMN|DIYSKTMN))))|Y(?:SQKYKDEKSKLEE|NE(?:TDKVQKAILQQ|KDQEEKRKLQE))|N(?:YYEDNDTDKNYYQ|NNAAKLSELSTAQ|THESAQRKKLEEN|DEEKKKRDELEKN|KN(?:NPPLYKLSLEK|KSPLDKLSLDK))|T(?:RYKKDDEDGNFFQ|PTQGKCHCIDGT(?:N|V)|L(?:FDYKCGHDENAP|WNEKCGHGDYNL))|I(?:EHYKDDPEENFYE|KS(?:NYNDSEGNYFK|QYDDNEGNYFK)|LFDYKCAHDNDKV)|D(?:RKEKVKLEENLKN|KGEKKKLEENLKN|Q(?:ERK(?:HLLEKRLET|QHLEKRLET)|QEK(?:LYLENNLKK|AKLENNLKR))|AKKHYGDDENYYK)|P(?:NKCRCEDANADQV|CSVQKCTCINGDP)|E(?:SN(?:M(?:GQCRCFSGDP|VQCRCFSGDP)|KGQCRCFSGDP)|THGYCRCVNRVDV|F(?:SGGKCGHKDNNV|T(?:GGQCGRDGENV|DGHCGH(?:RQGNV|NEENV))))|VKDRYQNDGPDFFK|K(?:S(?:SYNDDGTGNYFK|YYKNDNDRNYFK)|N(?:HYNDTSKNYYK|NNN(?:ELNNLSLDK|KLSNLSTKE))|D(?:HYKGDEANNYFQ|NNTKLNDLSIQE)|K(?:TNPALKSFTNEE|KLEENLRNIFKN)|FSNPKCGHNEGSP)|F(?:SNDQCGHNN(?:RGDP|GGAP)|WYPKC(?:GHHV(?:RQDV|KQ(?:DV|EV))|SHHVKQDV))|Q(?:RNNIKLQ(?:NIPLHE|TLTLHQ)|N(?:NNTKLQNIPLHE|KNENLKSLSLDK)|KENGDINTLKPEE)|L(?:FYYKCGHYVYKDV|WN(?:Y(?:NCGHHVN(?:RDV|QDV)|KCGHHVNQDV)|DKCGHHVKQDV))|ARDHYNDTSGNYYQ)/
96
+ block_sharing = 2
97
+ else
98
+ block_sharing = 0
99
+ end
100
+ block_sharing
101
+ end
151
102
 
152
- #get the block sharing group for this tag
153
- #puts seq.bs_group #to be implemented
103
+ #distict sequence identifier(DSID)
104
+ def dsid
105
+ "#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
106
+ end
154
107
 
155
- #get the length of the tag
156
- #puts seq.size
108
+ #position specific polymorphic block 1
109
+ def pspb1(anchor_pos=0,win_len=14)
110
+ self[14 + anchor_pos,win_len]
111
+ end
157
112
 
158
- #get the pspb1
159
- #puts seq.pspb1(0,14)
113
+ #position specific polymorphic block 2
114
+ def pspb2(anchor_pos=0,win_len=14)
115
+ if !ww_missing?
116
+ return self[ww_pos - 4 - anchor_pos - win_len, win_len]
117
+ elsif !vw_missing?
118
+ return self[vw_pos - 12 - win_len - anchor_pos, win_len]
119
+ else
120
+ return '....'
121
+ end
122
+ end
160
123
 
161
- #get the pspb2
162
- #puts seq.pspb2(0,14)
124
+ #position specific polymorphic block 3
125
+ def pspb3(anchor_pos=0,win_len=14)
126
+ if !ww_missing?
127
+ return self[ww_pos + 14 + anchor_pos, win_len]
128
+ elsif !vw_missing?
129
+ return self[vw_pos + 6 + anchor_pos, win_len]
130
+ else
131
+ return '....'
132
+ end
133
+ end
163
134
 
164
- #get the pspb3
165
- #puts seq.pspb3(0,14)
135
+ #position specific polymorphic block 4
136
+ def pspb4(anchor_pos=0,win_len=14)
137
+ self[self.length - 12 - win_len - anchor_pos, win_len]
138
+ end
166
139
 
167
- #get the pspb4
168
- #puts seq.pspb4(0,14)
140
+ private
141
+ def accepted_length
142
+ 100..168
143
+ end
169
144
 
145
+ def ww_missing?
146
+ true unless self =~ /WW/i
147
+ end
170
148
 
171
- #if input file is a fasta file
172
- #seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
149
+ def vw_missing?
150
+ true unless self =~ /VW/i
151
+ end
173
152
 
174
- #read the file
175
- #Bio::FlatFile.open(seq_file).each do |entry|
176
- #tag = Bio::Sequence::AA.new(entry.seq)
177
- #puts tag.start_motif
178
- #puts tag.end_motif
179
- #puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
180
- #end
153
+ def vw_ww_missing?
154
+ true if ww_missing? && vw_missing?
155
+ end
156
+ end
@@ -1,32 +1,54 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "Dbla" do
4
- context 'a group4 Dbla tag' do
5
- before(:each) do
6
- seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
7
- @tag = Bio::Sequence::AA.new(seq)
8
- end
9
-
10
- it "should return the number of cysteines" do
11
- @tag.cys_count.should == 4
12
- end
4
+ context 'Group4 Dbla tag' do
5
+ before(:each) do
6
+ seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
7
+ @tag = Bio::Sequence::AA.new(seq)
8
+ end
13
9
 
14
- it 'should return a dsid' do
15
- @tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
16
- end
10
+ it "should return the number of cysteines" do
11
+ @tag.cys_count.should == 4
12
+ end
17
13
 
18
- it 'should return the cyspolv group' do
19
- @tag.cyspolv_group.should == 4
20
- end
14
+ it 'should return a dsid' do
15
+ @tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
16
+ end
21
17
 
22
- it 'should return the length' do
23
- @tag.length.should == 126
24
- end
18
+ it 'should return the cyspolv group' do
19
+ @tag.cyspolv_group.should == 4
20
+ end
21
+
22
+ it 'should return the length' do
23
+ @tag.length.should == 126
24
+ end
25
+
26
+ it 'should return the start motif' do
27
+ @tag.start_motif.should == 'YIGDI'
28
+ end
29
+
30
+ it 'should return false for var1' do
31
+ @tag.is_var1?.should be_false
32
+ end
25
33
 
26
- it 'should return the start motif' do
27
- @tag.start_motif == 'YIGDI'
28
34
  end
29
35
 
30
- end
31
- end
36
+ context 'Group2 Dbla tag' do
37
+ before(:each) do
38
+ seq = 'DIGDIVRGTDLFLGGPSQEKKKLEENLKKILENIKNKNTKLSTLTLEKVREYWWALNRNDVWKALTCSAPYEAQYFIKSSDKEHSFSSEYCGHHNNDDPLTNLDYVPQFLR'
39
+ @tag2 = Bio::Sequence::AA.new(seq)
40
+ end
41
+
42
+ it 'should return the number of cysteines' do
43
+ @tag2.cys_count.should == 2
44
+ end
32
45
 
46
+ it 'should return the block sharing group 1' do
47
+ @tag2.bs_group.should == 1
48
+ end
49
+
50
+ it 'should return false for cp2 var1' do
51
+ @tag2.is_var1_cp2?.should be_false
52
+ end
53
+ end
54
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-dbla-classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-13 00:00:00.000000000Z
12
+ date: 2012-02-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio
16
- requirement: &2155795540 !ruby/object:Gem::Requirement
16
+ requirement: &2152850760 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.4.2
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2155795540
24
+ version_requirements: *2152850760
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &2155794860 !ruby/object:Gem::Requirement
27
+ requirement: &2152849320 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,21 +32,21 @@ dependencies:
32
32
  version: 2.3.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2155794860
35
+ version_requirements: *2152849320
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: bundler
38
- requirement: &2155787960 !ruby/object:Gem::Requirement
38
+ requirement: &2152848240 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
42
42
  - !ruby/object:Gem::Version
43
- version: 1.0.0
43
+ version: 1.1.rc.7
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2155787960
46
+ version_requirements: *2152848240
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: jeweler
49
- requirement: &2155786180 !ruby/object:Gem::Requirement
49
+ requirement: &2152846960 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.6.4
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2155786180
57
+ version_requirements: *2152846960
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rcov
60
- requirement: &2155784980 !ruby/object:Gem::Requirement
60
+ requirement: &2152855080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2155784980
68
+ version_requirements: *2152855080
69
69
  description: Methods to classify and manipulate PfEMP1 DBL-alpha sequence tags
70
70
  email: georgkam@gmail.com
71
71
  executables: []
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
103
  version: '0'
104
104
  segments:
105
105
  - 0
106
- hash: -1659999011793222104
106
+ hash: 3330781070760048632
107
107
  required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  none: false
109
109
  requirements:
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  requirements: []
114
114
  rubyforge_project:
115
- rubygems_version: 1.8.10
115
+ rubygems_version: 1.8.12
116
116
  signing_key:
117
117
  specification_version: 3
118
118
  summary: A tool to classify and manipulate PfEMP1 DBL-alpha sequence tags