bio-dbla-classifier 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +40 -39
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bio-dbla-classifier.gemspec +5 -5
- data/lib/bio-dbla-classifier.rb +2 -2
- data/lib/bio/sequence/aa/{dbla.rb → aa.rb} +26 -5
- data/spec/{dbla_spec.rb → aa_spec.rb} +5 -1
- data/spec/spec_helper.rb +1 -1
- metadata +16 -16
data/README.rdoc
CHANGED
@@ -1,57 +1,58 @@
|
|
1
1
|
= bio-dbla-classifier
|
2
2
|
|
3
|
-
DBL-alpha tags can be classified into six expression groups depending on the number of cysteines and presence of
|
3
|
+
DBL-alpha tags can be classified into six expression groups depending on the number of cysteines and presence of
|
4
4
|
sequence certain motifs within the tag region (Bull et al 2007). DBLa adds methods for grouping DBL-alpha amino acid sequence tags.
|
5
5
|
The DBLa class is a subclass of Bio::Sequence::AA. If you apply this method please quote this article
|
6
6
|
Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
|
7
7
|
|
8
8
|
= Installation
|
9
|
-
|
9
|
+
|
10
10
|
gem install bio-dbla-classifier
|
11
11
|
|
12
12
|
= Uninstall
|
13
|
-
|
13
|
+
|
14
14
|
gem uninstall bio-dbla-classifier
|
15
15
|
|
16
16
|
= Usage
|
17
|
-
require 'bio'
|
18
17
|
require 'bio-dbla-classifier'
|
19
18
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
19
|
+
#create an instace of a new Bio::Sequence::AA class. This class simply extends the Bio::Sequence::AA class with methods
|
20
|
+
#to classify and describe Dbla tags.
|
21
|
+
|
22
|
+
#seq1 = 'DIGDIIRGRDLYSGNNKEKEQRKKLEKNGKTIVGKIYNEATNGQALQARYKGDDNNNYSKLREDRWTANRATIWEAITCDDDNKLSNASYVRPTSTDGQSGAQGKDKCRSANKTTGNTGDVNIVPTYFDYVPQYLR'
|
23
|
+
#seq = Bio::Sequence::AA.new(seq1)
|
24
|
+
|
25
|
+
#get the positions of limited variability
|
26
|
+
#puts seq.polv1
|
27
|
+
#puts seq.polv2
|
28
|
+
#puts seq.polv3
|
29
|
+
#puts seq.polv4
|
30
|
+
|
31
|
+
#get the number if cysteines in the tag
|
32
|
+
#puts seq.cys_count
|
33
|
+
|
34
|
+
#get the distinct sequence identifier
|
35
|
+
#puts seq.dsid
|
36
|
+
|
37
|
+
#get the cyspolv group for this tag
|
38
|
+
#puts seq.cyspolv_group
|
39
|
+
|
40
|
+
|
41
|
+
#get the block sharing group for this tag
|
42
|
+
#puts seq.bs_group #to be implemented
|
43
|
+
|
44
|
+
#get the length of the tag
|
45
|
+
#puts seq.size
|
46
|
+
|
47
|
+
#if input file is a fasta file
|
48
|
+
#seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
|
49
|
+
|
50
|
+
#read the file
|
51
|
+
#Bio::FlatFile.open(seq_file).each do |entry|
|
52
|
+
#tag = Bio::Sequence::AA.new(entry.seq)
|
53
|
+
#puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
54
|
+
#end
|
55
|
+
|
55
56
|
= Copyright
|
56
57
|
|
57
58
|
See LICENSE.txt for further details.
|
data/Rakefile
CHANGED
@@ -18,7 +18,7 @@ Jeweler::Tasks.new do |gem|
|
|
18
18
|
gem.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
|
19
19
|
gem.license = "Ruby"
|
20
20
|
gem.summary = %Q{Classify PfEMP1 DBL-alpha tags using the cyspolv grouping approach}
|
21
|
-
gem.description = %Q{A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al
|
21
|
+
gem.description = %Q{A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al 2007}
|
22
22
|
gem.email = "georgkam@gmail.com"
|
23
23
|
gem.authors = ["George Githinji"]
|
24
24
|
# dependencies defined in Gemfile
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/bio-dbla-classifier.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-dbla-classifier"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["George Githinji"]
|
12
|
-
s.date = "2011-
|
13
|
-
s.description = "A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al
|
12
|
+
s.date = "2011-10-10"
|
13
|
+
s.description = "A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al 2007"
|
14
14
|
s.email = "georgkam@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE.txt",
|
@@ -27,9 +27,9 @@ Gem::Specification.new do |s|
|
|
27
27
|
"VERSION",
|
28
28
|
"bio-dbla-classifier.gemspec",
|
29
29
|
"lib/bio-dbla-classifier.rb",
|
30
|
-
"lib/bio/sequence/aa/
|
30
|
+
"lib/bio/sequence/aa/aa.rb",
|
31
|
+
"spec/aa_spec.rb",
|
31
32
|
"spec/bio-dbla-classifier_spec.rb",
|
32
|
-
"spec/dbla_spec.rb",
|
33
33
|
"spec/spec_helper.rb"
|
34
34
|
]
|
35
35
|
s.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
|
data/lib/bio-dbla-classifier.rb
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'bio'
|
2
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), 'bio','sequence/aa/aa')
|
@@ -1,4 +1,16 @@
|
|
1
|
-
class
|
1
|
+
class Bio::Sequence::AA
|
2
|
+
|
3
|
+
def has_accepted_length?
|
4
|
+
true if accepted_length.include? self.length
|
5
|
+
end
|
6
|
+
|
7
|
+
def start_motif
|
8
|
+
self[0,5]
|
9
|
+
end
|
10
|
+
|
11
|
+
def end_motif
|
12
|
+
self[-5,self.length]
|
13
|
+
end
|
2
14
|
|
3
15
|
def dsid
|
4
16
|
"#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
|
@@ -36,14 +48,14 @@ class Dbla < Bio::Sequence::AA
|
|
36
48
|
#The third position of limited variability(polv3)
|
37
49
|
def polv3
|
38
50
|
if self =~ /WW/
|
39
|
-
polv3 = self[ww_pos + 10,4]
|
51
|
+
polv3 = self[ww_pos + 10,4]
|
40
52
|
elsif self =~ /VW/
|
41
53
|
polv3 = self[vw_pos + 2,4]
|
42
54
|
else
|
43
55
|
end
|
44
56
|
polv3
|
45
57
|
end
|
46
|
-
|
58
|
+
|
47
59
|
#The fourth position of limited variability(polv4)
|
48
60
|
def polv4
|
49
61
|
self[self.length - 12,4]
|
@@ -68,6 +80,13 @@ class Dbla < Bio::Sequence::AA
|
|
68
80
|
end
|
69
81
|
group
|
70
82
|
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
def accepted_length
|
87
|
+
100..168
|
88
|
+
end
|
89
|
+
|
71
90
|
end
|
72
91
|
|
73
92
|
|
@@ -75,7 +94,7 @@ end
|
|
75
94
|
#to classify and describe Dbla properties
|
76
95
|
|
77
96
|
#seq1 = 'DIGDIIRGRDLYSGNNKEKEQRKKLEKNGKTIVGKIYNEATNGQALQARYKGDDNNNYSKLREDRWTANRATIWEAITCDDDNKLSNASYVRPTSTDGQSGAQGKDKCRSANKTTGNTGDVNIVPTYFDYVPQYLR'
|
78
|
-
#seq =
|
97
|
+
#seq = Bio::Sequence::AA.new(seq1)
|
79
98
|
|
80
99
|
#get the positions of limited variability
|
81
100
|
#puts seq.polv1
|
@@ -104,6 +123,8 @@ end
|
|
104
123
|
|
105
124
|
#read the file
|
106
125
|
#Bio::FlatFile.open(seq_file).each do |entry|
|
107
|
-
#tag =
|
126
|
+
#tag = Bio::Sequence::AA.new(entry.seq)
|
127
|
+
#puts tag.start_motif
|
128
|
+
#puts tag.end_motif
|
108
129
|
#puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
|
109
130
|
#end
|
@@ -4,7 +4,7 @@ describe "Dbla" do
|
|
4
4
|
context 'a group4 Dbla tag' do
|
5
5
|
before(:each) do
|
6
6
|
seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
|
7
|
-
@tag =
|
7
|
+
@tag = Bio::Sequence::AA.new(seq)
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should return the number of cysteines" do
|
@@ -23,6 +23,10 @@ describe "Dbla" do
|
|
23
23
|
@tag.length.should == 126
|
24
24
|
end
|
25
25
|
|
26
|
+
it 'should return the start motif' do
|
27
|
+
@tag.start_motif == 'YIGDI'
|
28
|
+
end
|
29
|
+
|
26
30
|
end
|
27
31
|
end
|
28
32
|
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-dbla-classifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-10-10 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement: &
|
16
|
+
requirement: &2155406420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.4.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2155406420
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2155405820 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.3.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2155405820
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bundler
|
38
|
-
requirement: &
|
38
|
+
requirement: &2155405200 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.0.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2155405200
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: jeweler
|
49
|
-
requirement: &
|
49
|
+
requirement: &2155404460 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.6.4
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2155404460
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rcov
|
60
|
-
requirement: &
|
60
|
+
requirement: &2155403800 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,9 +65,9 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2155403800
|
69
69
|
description: A classification system for DBL-alpha sequence tags using the CysPolv
|
70
|
-
approach described by Bull et al
|
70
|
+
approach described by Bull et al 2007
|
71
71
|
email: georgkam@gmail.com
|
72
72
|
executables: []
|
73
73
|
extensions: []
|
@@ -85,9 +85,9 @@ files:
|
|
85
85
|
- VERSION
|
86
86
|
- bio-dbla-classifier.gemspec
|
87
87
|
- lib/bio-dbla-classifier.rb
|
88
|
-
- lib/bio/sequence/aa/
|
88
|
+
- lib/bio/sequence/aa/aa.rb
|
89
|
+
- spec/aa_spec.rb
|
89
90
|
- spec/bio-dbla-classifier_spec.rb
|
90
|
-
- spec/dbla_spec.rb
|
91
91
|
- spec/spec_helper.rb
|
92
92
|
homepage: http://github.com/georgeG/bioruby-dbla-classifier
|
93
93
|
licenses:
|
@@ -104,7 +104,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
104
|
version: '0'
|
105
105
|
segments:
|
106
106
|
- 0
|
107
|
-
hash:
|
107
|
+
hash: 1540756780338965641
|
108
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
109
|
none: false
|
110
110
|
requirements:
|