bio-dbla-classifier 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'bio', '>= 1.4.2'
4
+
5
+ # Needed to run rake, tests, features
6
+ group :development do
7
+ gem "rspec", "~> 2.3.0"
8
+ gem "bundler", "~> 1.0.0"
9
+ gem "jeweler", "~> 1.6.4"
10
+ gem "rcov", ">= 0"
11
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ bio (1.4.2)
5
+ diff-lcs (1.1.3)
6
+ git (1.2.5)
7
+ jeweler (1.6.4)
8
+ bundler (~> 1.0)
9
+ git (>= 1.2.5)
10
+ rake
11
+ rake (0.9.2)
12
+ rcov (0.9.10)
13
+ rspec (2.3.0)
14
+ rspec-core (~> 2.3.0)
15
+ rspec-expectations (~> 2.3.0)
16
+ rspec-mocks (~> 2.3.0)
17
+ rspec-core (2.3.1)
18
+ rspec-expectations (2.3.0)
19
+ diff-lcs (~> 1.1.2)
20
+ rspec-mocks (2.3.0)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bio (>= 1.4.2)
27
+ bundler (~> 1.0.0)
28
+ jeweler (~> 1.6.4)
29
+ rcov
30
+ rspec (~> 2.3.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,9 @@
1
+ The Ruby License
2
+
3
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
4
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
5
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
6
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
7
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
9
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,58 @@
1
+ = bio-dbla-classifier
2
+
3
+ DBL-alpha tags can be classified into six expression groups depending on the number of cysteines and presence of
4
+ sequence certain motifs within the tag region (Bull et al 2007). DBLa adds methods for grouping DBL-alpha amino acid sequence tags.
5
+ The DBLa class is a subclass of Bio::Sequence::AA. If you apply this method please quote this article
6
+ Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
7
+
8
+ = Installation
9
+
10
+ gem install bio-dbla-classifier
11
+
12
+ = Uninstall
13
+
14
+ gem uninstall bio-dbla-classifier
15
+
16
+ = Usage
17
+ require 'bio'
18
+ require 'bio-dbla-classifier'
19
+
20
+ #create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
21
+ #to classify and describe Dbla properties
22
+ seq = Dbla.new('DIGDIVRGRDMFKSNDDVEKGLKVVFKKIYKSLPSPAKSHYADHDKSGNYYKLREHWWIVNRKQLWEAITCIAPRDAHYFLKSSPDFKSFSDRKCGHYEGAPPTYLDYVPQYLR')
23
+
24
+ #get the positions of limited variability
25
+ puts seq.polv1
26
+ puts seq.polv2
27
+ puts seq.polv3
28
+ puts seq.polv4
29
+
30
+ #get the distinct sequence identifier
31
+ puts seq.dsid
32
+
33
+ #get the cyspolv group for this tag
34
+ puts seq.group
35
+
36
+ #get the number if cysteines in the tag
37
+ puts seq.cys_count
38
+
39
+ #get the block sharing group for this tag
40
+ #puts seq.bs_group
41
+
42
+ #get the length of the tag
43
+ #puts seq.size
44
+
45
+ #if input file is a fasta file
46
+ seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
47
+
48
+ #read the file
49
+ Bio::FlatFile.open(seq_file).each do |entry|
50
+ puts entry.definition
51
+ tag = Dbla.new(entry.seq)
52
+ puts tag.dsid
53
+ puts tag.get_group
54
+ end
55
+ = Copyright
56
+
57
+ See LICENSE.txt for further details.
58
+
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-dbla-classifier"
18
+ gem.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
19
+ gem.license = "Ruby"
20
+ gem.summary = %Q{Classify PfEMP1 DBL-alpha tags using the cyspolv grouping approach}
21
+ gem.description = %Q{A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al 2005}
22
+ gem.email = "georgkam@gmail.com"
23
+ gem.authors = ["George Githinji"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "bio-dbla-classifier #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.1
@@ -0,0 +1,65 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "bio-dbla-classifier"
8
+ s.version = "0.2.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["George Githinji"]
12
+ s.date = "2011-09-30"
13
+ s.description = "A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al 2005"
14
+ s.email = "georgkam@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bio-dbla-classifier.gemspec",
29
+ "lib/bio-dbla-classifier.rb",
30
+ "lib/bio/sequence/aa/dbla.rb",
31
+ "spec/bio-dbla-classifier_spec.rb",
32
+ "spec/dbla_spec.rb",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+ s.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
36
+ s.licenses = ["Ruby"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = "1.8.10"
39
+ s.summary = "Classify PfEMP1 DBL-alpha tags using the cyspolv grouping approach"
40
+
41
+ if s.respond_to? :specification_version then
42
+ s.specification_version = 3
43
+
44
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
46
+ s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
47
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
48
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
49
+ s.add_development_dependency(%q<rcov>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
52
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
53
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
54
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
55
+ s.add_dependency(%q<rcov>, [">= 0"])
56
+ end
57
+ else
58
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
59
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
60
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
61
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
62
+ s.add_dependency(%q<rcov>, [">= 0"])
63
+ end
64
+ end
65
+
@@ -0,0 +1,2 @@
1
+ require 'bio'
2
+ require File.join(File.expand_path(File.dirname(__FILE__)), 'bio','sequence/aa/dbla')
@@ -0,0 +1,109 @@
1
+ class Dbla < Bio::Sequence::AA
2
+
3
+ def dsid
4
+ "#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
5
+ end
6
+
7
+ def ww_pos
8
+ rindex("WW")
9
+ end
10
+
11
+ def vw_pos
12
+ rindex("VW")
13
+ end
14
+
15
+ #number of cysteines
16
+ def cys_count
17
+ scan(/C/).size
18
+ end
19
+
20
+ #The first position of limited variability(polv1)
21
+ def polv1
22
+ self[10,4]
23
+ end
24
+
25
+ #The second position of limited variability(polv2)
26
+ def polv2
27
+ if self =~ /WW/
28
+ polv2 = self[ww_pos - 4,4]
29
+ elsif self =~ /VW/
30
+ polv2 = self[vw_pos - 12,4]
31
+ else
32
+ end
33
+ polv2
34
+ end
35
+
36
+ #The third position of limited variability(polv3)
37
+ def polv3
38
+ if self =~ /WW/
39
+ polv3 = self[ww_pos + 10,4]
40
+ elsif self =~ /VW/
41
+ polv3 = self[vw_pos + 2,4]
42
+ else
43
+ end
44
+ polv3
45
+ end
46
+
47
+ #The fourth position of limited variability(polv4)
48
+ def polv4
49
+ self[self.length - 12,4]
50
+ end
51
+
52
+ #Assigning dsid group based on cysteines coun and presence of
53
+ #REY motif in polv2, MFK in polv1,
54
+ def cyspolv_group
55
+ case
56
+ when cys_count > 4 || cys_count == 3 || cys_count < 2
57
+ group = 6
58
+ when cys_count == 4 && polv2 =~ /REY/i
59
+ group = 5
60
+ when cys_count == 4
61
+ group = 4
62
+ when cys_count == 2 && polv1 =~ /MFK/i
63
+ group = 1
64
+ when cys_count == 2 && polv2 =~ /REY/i
65
+ group =2
66
+ else
67
+ group = 3
68
+ end
69
+ group
70
+ end
71
+ end
72
+
73
+
74
+ #create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
75
+ #to classify and describe Dbla properties
76
+
77
+ #seq1 = 'DIGDIIRGRDLYSGNNKEKEQRKKLEKNGKTIVGKIYNEATNGQALQARYKGDDNNNYSKLREDRWTANRATIWEAITCDDDNKLSNASYVRPTSTDGQSGAQGKDKCRSANKTTGNTGDVNIVPTYFDYVPQYLR'
78
+ #seq = Dbla.new(seq1)
79
+
80
+ #get the positions of limited variability
81
+ #puts seq.polv1
82
+ #puts seq.polv2
83
+ #puts seq.polv3
84
+ #puts seq.polv4
85
+
86
+ #get the number if cysteines in the tag
87
+ #puts seq.cys_count
88
+
89
+ #get the distinct sequence identifier
90
+ #puts seq.dsid
91
+
92
+ #get the cyspolv group for this tag
93
+ #puts seq.cyspolv_group
94
+
95
+
96
+ #get the block sharing group for this tag
97
+ #puts seq.bs_group #to be implemented
98
+
99
+ #get the length of the tag
100
+ #puts seq.size
101
+
102
+ #if input file is a fasta file
103
+ #seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
104
+
105
+ #read the file
106
+ #Bio::FlatFile.open(seq_file).each do |entry|
107
+ #tag = Dbla.new(entry.seq)
108
+ #puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
109
+ #end
@@ -0,0 +1,4 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "BioDblaClassifier" do
4
+ end
data/spec/dbla_spec.rb ADDED
@@ -0,0 +1,28 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Dbla" do
4
+ context 'a group4 Dbla tag' do
5
+ before(:each) do
6
+ seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
7
+ @tag = Dbla.new(seq)
8
+ end
9
+
10
+ it "should return the number of cysteines" do
11
+ @tag.cys_count.should == 4
12
+ end
13
+
14
+ it 'should return a dsid' do
15
+ @tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
16
+ end
17
+
18
+ it 'should return the cyspolv group' do
19
+ @tag.cyspolv_group.should == 4
20
+ end
21
+
22
+ it 'should return the length' do
23
+ @tag.length.should == 126
24
+ end
25
+
26
+ end
27
+ end
28
+
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'bio-dbla-classifier'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-dbla-classifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - George Githinji
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-30 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio
16
+ requirement: &2166374180 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.4.2
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2166374180
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2166373480 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 2.3.0
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2166373480
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &2166372700 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2166372700
47
+ - !ruby/object:Gem::Dependency
48
+ name: jeweler
49
+ requirement: &2166372220 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.6.4
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2166372220
58
+ - !ruby/object:Gem::Dependency
59
+ name: rcov
60
+ requirement: &2166371500 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *2166371500
69
+ description: A classification system for DBL-alpha sequence tags using the CysPolv
70
+ approach described by Bull et al 2005
71
+ email: georgkam@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files:
75
+ - LICENSE.txt
76
+ - README.rdoc
77
+ files:
78
+ - .document
79
+ - .rspec
80
+ - Gemfile
81
+ - Gemfile.lock
82
+ - LICENSE.txt
83
+ - README.rdoc
84
+ - Rakefile
85
+ - VERSION
86
+ - bio-dbla-classifier.gemspec
87
+ - lib/bio-dbla-classifier.rb
88
+ - lib/bio/sequence/aa/dbla.rb
89
+ - spec/bio-dbla-classifier_spec.rb
90
+ - spec/dbla_spec.rb
91
+ - spec/spec_helper.rb
92
+ homepage: http://github.com/georgeG/bioruby-dbla-classifier
93
+ licenses:
94
+ - Ruby
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ none: false
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ segments:
106
+ - 0
107
+ hash: -1391751957910863879
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 1.8.10
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: Classify PfEMP1 DBL-alpha tags using the cyspolv grouping approach
120
+ test_files: []