bio-dbla-classifier 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'bio', '>= 1.4.2'
4
+
5
+ # Needed to run rake, tests, features
6
+ group :development do
7
+ gem "rspec", "~> 2.3.0"
8
+ gem "bundler", "~> 1.0.0"
9
+ gem "jeweler", "~> 1.6.4"
10
+ gem "rcov", ">= 0"
11
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ bio (1.4.2)
5
+ diff-lcs (1.1.3)
6
+ git (1.2.5)
7
+ jeweler (1.6.4)
8
+ bundler (~> 1.0)
9
+ git (>= 1.2.5)
10
+ rake
11
+ rake (0.9.2)
12
+ rcov (0.9.10)
13
+ rspec (2.3.0)
14
+ rspec-core (~> 2.3.0)
15
+ rspec-expectations (~> 2.3.0)
16
+ rspec-mocks (~> 2.3.0)
17
+ rspec-core (2.3.1)
18
+ rspec-expectations (2.3.0)
19
+ diff-lcs (~> 1.1.2)
20
+ rspec-mocks (2.3.0)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bio (>= 1.4.2)
27
+ bundler (~> 1.0.0)
28
+ jeweler (~> 1.6.4)
29
+ rcov
30
+ rspec (~> 2.3.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,9 @@
1
+ The Ruby License
2
+
3
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
4
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
5
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
6
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
7
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
9
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,58 @@
1
+ = bio-dbla-classifier
2
+
3
+ DBL-alpha tags can be classified into six expression groups depending on the number of cysteines and presence of
4
+ sequence certain motifs within the tag region (Bull et al 2007). DBLa adds methods for grouping DBL-alpha amino acid sequence tags.
5
+ The DBLa class is a subclass of Bio::Sequence::AA. If you apply this method please quote this article
6
+ Bull et al “An approach to classifying sequence tags sampled from Plasmodium falciparum var genes..” Molecular and Biochemical Parasitology 154 (1) (July): 98–102. doi:10.1016/j.molbiopara.2007.03.011.
7
+
8
+ = Installation
9
+
10
+ gem install bio-dbla-classifier
11
+
12
+ = Uninstall
13
+
14
+ gem uninstall bio-dbla-classifier
15
+
16
+ = Usage
17
+ require 'bio'
18
+ require 'bio-dbla-classifier'
19
+
20
+ #create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
21
+ #to classify and describe Dbla properties
22
+ seq = Dbla.new('DIGDIVRGRDMFKSNDDVEKGLKVVFKKIYKSLPSPAKSHYADHDKSGNYYKLREHWWIVNRKQLWEAITCIAPRDAHYFLKSSPDFKSFSDRKCGHYEGAPPTYLDYVPQYLR')
23
+
24
+ #get the positions of limited variability
25
+ puts seq.polv1
26
+ puts seq.polv2
27
+ puts seq.polv3
28
+ puts seq.polv4
29
+
30
+ #get the distinct sequence identifier
31
+ puts seq.dsid
32
+
33
+ #get the cyspolv group for this tag
34
+ puts seq.group
35
+
36
+ #get the number if cysteines in the tag
37
+ puts seq.cys_count
38
+
39
+ #get the block sharing group for this tag
40
+ #puts seq.bs_group
41
+
42
+ #get the length of the tag
43
+ #puts seq.size
44
+
45
+ #if input file is a fasta file
46
+ seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
47
+
48
+ #read the file
49
+ Bio::FlatFile.open(seq_file).each do |entry|
50
+ puts entry.definition
51
+ tag = Dbla.new(entry.seq)
52
+ puts tag.dsid
53
+ puts tag.get_group
54
+ end
55
+ = Copyright
56
+
57
+ See LICENSE.txt for further details.
58
+
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-dbla-classifier"
18
+ gem.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
19
+ gem.license = "Ruby"
20
+ gem.summary = %Q{Classify PfEMP1 DBL-alpha tags using the cyspolv grouping approach}
21
+ gem.description = %Q{A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al 2005}
22
+ gem.email = "georgkam@gmail.com"
23
+ gem.authors = ["George Githinji"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "bio-dbla-classifier #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.1
@@ -0,0 +1,65 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "bio-dbla-classifier"
8
+ s.version = "0.2.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["George Githinji"]
12
+ s.date = "2011-09-30"
13
+ s.description = "A classification system for DBL-alpha sequence tags using the CysPolv approach described by Bull et al 2005"
14
+ s.email = "georgkam@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bio-dbla-classifier.gemspec",
29
+ "lib/bio-dbla-classifier.rb",
30
+ "lib/bio/sequence/aa/dbla.rb",
31
+ "spec/bio-dbla-classifier_spec.rb",
32
+ "spec/dbla_spec.rb",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+ s.homepage = "http://github.com/georgeG/bioruby-dbla-classifier"
36
+ s.licenses = ["Ruby"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = "1.8.10"
39
+ s.summary = "Classify PfEMP1 DBL-alpha tags using the cyspolv grouping approach"
40
+
41
+ if s.respond_to? :specification_version then
42
+ s.specification_version = 3
43
+
44
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
46
+ s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
47
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
48
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
49
+ s.add_development_dependency(%q<rcov>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
52
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
53
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
54
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
55
+ s.add_dependency(%q<rcov>, [">= 0"])
56
+ end
57
+ else
58
+ s.add_dependency(%q<bio>, [">= 1.4.2"])
59
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
60
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
61
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
62
+ s.add_dependency(%q<rcov>, [">= 0"])
63
+ end
64
+ end
65
+
@@ -0,0 +1,2 @@
1
+ require 'bio'
2
+ require File.join(File.expand_path(File.dirname(__FILE__)), 'bio','sequence/aa/dbla')
@@ -0,0 +1,109 @@
1
+ class Dbla < Bio::Sequence::AA
2
+
3
+ def dsid
4
+ "#{polv1}-#{polv2}-#{polv3}-#{cys_count.to_s}-#{polv4}-#{self.length}"
5
+ end
6
+
7
+ def ww_pos
8
+ rindex("WW")
9
+ end
10
+
11
+ def vw_pos
12
+ rindex("VW")
13
+ end
14
+
15
+ #number of cysteines
16
+ def cys_count
17
+ scan(/C/).size
18
+ end
19
+
20
+ #The first position of limited variability(polv1)
21
+ def polv1
22
+ self[10,4]
23
+ end
24
+
25
+ #The second position of limited variability(polv2)
26
+ def polv2
27
+ if self =~ /WW/
28
+ polv2 = self[ww_pos - 4,4]
29
+ elsif self =~ /VW/
30
+ polv2 = self[vw_pos - 12,4]
31
+ else
32
+ end
33
+ polv2
34
+ end
35
+
36
+ #The third position of limited variability(polv3)
37
+ def polv3
38
+ if self =~ /WW/
39
+ polv3 = self[ww_pos + 10,4]
40
+ elsif self =~ /VW/
41
+ polv3 = self[vw_pos + 2,4]
42
+ else
43
+ end
44
+ polv3
45
+ end
46
+
47
+ #The fourth position of limited variability(polv4)
48
+ def polv4
49
+ self[self.length - 12,4]
50
+ end
51
+
52
+ #Assigning dsid group based on cysteines coun and presence of
53
+ #REY motif in polv2, MFK in polv1,
54
+ def cyspolv_group
55
+ case
56
+ when cys_count > 4 || cys_count == 3 || cys_count < 2
57
+ group = 6
58
+ when cys_count == 4 && polv2 =~ /REY/i
59
+ group = 5
60
+ when cys_count == 4
61
+ group = 4
62
+ when cys_count == 2 && polv1 =~ /MFK/i
63
+ group = 1
64
+ when cys_count == 2 && polv2 =~ /REY/i
65
+ group =2
66
+ else
67
+ group = 3
68
+ end
69
+ group
70
+ end
71
+ end
72
+
73
+
74
+ #create an instace of a new DBL-alpha tag. A dbla tag extends the Bio::Sequence::AA class with methods
75
+ #to classify and describe Dbla properties
76
+
77
+ #seq1 = 'DIGDIIRGRDLYSGNNKEKEQRKKLEKNGKTIVGKIYNEATNGQALQARYKGDDNNNYSKLREDRWTANRATIWEAITCDDDNKLSNASYVRPTSTDGQSGAQGKDKCRSANKTTGNTGDVNIVPTYFDYVPQYLR'
78
+ #seq = Dbla.new(seq1)
79
+
80
+ #get the positions of limited variability
81
+ #puts seq.polv1
82
+ #puts seq.polv2
83
+ #puts seq.polv3
84
+ #puts seq.polv4
85
+
86
+ #get the number if cysteines in the tag
87
+ #puts seq.cys_count
88
+
89
+ #get the distinct sequence identifier
90
+ #puts seq.dsid
91
+
92
+ #get the cyspolv group for this tag
93
+ #puts seq.cyspolv_group
94
+
95
+
96
+ #get the block sharing group for this tag
97
+ #puts seq.bs_group #to be implemented
98
+
99
+ #get the length of the tag
100
+ #puts seq.size
101
+
102
+ #if input file is a fasta file
103
+ #seq_file = "#{ENV['HOME']}/sequences/878_kilifi_sequences.fasta"
104
+
105
+ #read the file
106
+ #Bio::FlatFile.open(seq_file).each do |entry|
107
+ #tag = Dbla.new(entry.seq)
108
+ #puts "#{entry.definition},#{tag.dsid},#{tag.cys_count},#{tag.cyspolv_group}"
109
+ #end
@@ -0,0 +1,4 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "BioDblaClassifier" do
4
+ end
data/spec/dbla_spec.rb ADDED
@@ -0,0 +1,28 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Dbla" do
4
+ context 'a group4 Dbla tag' do
5
+ before(:each) do
6
+ seq = 'YIGDIIRGRDLYLVNPQEKEQRDKLEENLKKIFKKIHDDVMKTSGRTNGAKARYGGDENFFKLREDWWTANRSTVWKAITCGTHDGASYFRATCSDGQSGAQAKNKCTCNNGDVPTYFDYVPQFLR'
7
+ @tag = Dbla.new(seq)
8
+ end
9
+
10
+ it "should return the number of cysteines" do
11
+ @tag.cys_count.should == 4
12
+ end
13
+
14
+ it 'should return a dsid' do
15
+ @tag.dsid.should == 'LYLV-LRED-KAIT-4-PTYF-126'
16
+ end
17
+
18
+ it 'should return the cyspolv group' do
19
+ @tag.cyspolv_group.should == 4
20
+ end
21
+
22
+ it 'should return the length' do
23
+ @tag.length.should == 126
24
+ end
25
+
26
+ end
27
+ end
28
+
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'bio-dbla-classifier'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-dbla-classifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - George Githinji
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-30 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio
16
+ requirement: &2166374180 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.4.2
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2166374180
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2166373480 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 2.3.0
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2166373480
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &2166372700 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2166372700
47
+ - !ruby/object:Gem::Dependency
48
+ name: jeweler
49
+ requirement: &2166372220 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.6.4
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2166372220
58
+ - !ruby/object:Gem::Dependency
59
+ name: rcov
60
+ requirement: &2166371500 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *2166371500
69
+ description: A classification system for DBL-alpha sequence tags using the CysPolv
70
+ approach described by Bull et al 2005
71
+ email: georgkam@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files:
75
+ - LICENSE.txt
76
+ - README.rdoc
77
+ files:
78
+ - .document
79
+ - .rspec
80
+ - Gemfile
81
+ - Gemfile.lock
82
+ - LICENSE.txt
83
+ - README.rdoc
84
+ - Rakefile
85
+ - VERSION
86
+ - bio-dbla-classifier.gemspec
87
+ - lib/bio-dbla-classifier.rb
88
+ - lib/bio/sequence/aa/dbla.rb
89
+ - spec/bio-dbla-classifier_spec.rb
90
+ - spec/dbla_spec.rb
91
+ - spec/spec_helper.rb
92
+ homepage: http://github.com/georgeG/bioruby-dbla-classifier
93
+ licenses:
94
+ - Ruby
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ none: false
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ segments:
106
+ - 0
107
+ hash: -1391751957910863879
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 1.8.10
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: Classify PfEMP1 DBL-alpha tags using the cyspolv grouping approach
120
+ test_files: []