bio-sambamba 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/.document +5 -0
  2. data/.travis.yml +12 -0
  3. data/Gemfile +11 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.md +68 -0
  6. data/Rakefile +47 -0
  7. data/VERSION +1 -0
  8. data/features/iterate-alignments.feature +40 -0
  9. data/features/random-access.feature +10 -0
  10. data/features/sam-header.feature +23 -0
  11. data/features/step_definitions/iterate-alignments_steps.rb +83 -0
  12. data/features/step_definitions/random-access_steps.rb +22 -0
  13. data/features/step_definitions/sam-header_steps.rb +56 -0
  14. data/features/step_definitions/validation-steps.rb +34 -0
  15. data/features/support/env.rb +13 -0
  16. data/features/syntax-sugar.feature +17 -0
  17. data/features/validation.feature +16 -0
  18. data/lib/bio-sambamba.rb +8 -0
  19. data/lib/bio-sambamba/alignment.rb +131 -0
  20. data/lib/bio-sambamba/alignmentiterator.rb +45 -0
  21. data/lib/bio-sambamba/bamfile.rb +45 -0
  22. data/lib/bio-sambamba/samfile.rb +25 -0
  23. data/lib/bio-sambamba/samheader.rb +194 -0
  24. data/test/data/bins.bam +0 -0
  25. data/test/data/bins.bam.bai +0 -0
  26. data/test/data/c1215_fixmate.bam +0 -0
  27. data/test/data/corrupted_zlib_archive.bam +0 -0
  28. data/test/data/duplicated_block_size.bam +0 -0
  29. data/test/data/ex1_header.bam +0 -0
  30. data/test/data/ex1_header.bam.bai +0 -0
  31. data/test/data/ex1_header.sam +3273 -0
  32. data/test/data/ex1_header.uncompressed.bam +0 -0
  33. data/test/data/no_block_size.bam +0 -0
  34. data/test/data/tags.bam +0 -0
  35. data/test/data/tags.bam.bai +0 -0
  36. data/test/data/wrong_bc_subfield_length.bam +0 -0
  37. data/test/data/wrong_extra_gzip_length.bam +0 -0
  38. metadata +184 -0
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/*.feature
5
+ LICENSE.txt
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # - 1.8.7
8
+ # - jruby-18mode # JRuby in 1.8 mode
9
+ # - rbx-18mode
10
+
11
+ # uncomment this line if your project needs to run something other than `rake`:
12
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "bio", "~> 1.4.2"
4
+ gem "oj", "~> 1.2.9"
5
+
6
+ group :development do
7
+ gem "bundler", "~> 1.1.4"
8
+ gem "jeweler", "~> 1.8.3"
9
+ gem "rspec", "~> 2.7.0"
10
+ gem "cucumber", "~> 1.2.0"
11
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Artem Tarasov
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # bio-sambamba
2
+
3
+ [![Build Status](https://secure.travis-ci.org/lomereiter/bioruby-sambamba.png)](http://travis-ci.org/lomereiter/bioruby-sambamba)
4
+
5
+ Full description goes here
6
+
7
+ Note: this software is under active development!
8
+
9
+ ## Installation
10
+
11
+ ```sh
12
+ rake build
13
+ rake install
14
+ ```
15
+
16
+ In order to use the gem, you also need <code>sambamba</code> tool installed.
17
+ For that, do the following:
18
+
19
+ * install [DMD compiler](http://dlang.org/download.html)
20
+ * install [Ragel](http://www.complang.org/ragel/) finite state machine compiler
21
+ * clone sambamba repository and compile the tool
22
+
23
+ ```sh
24
+ git clone https://github.com/lomereiter/sambamba.git
25
+ cd sambamba/CLItools/
26
+ make
27
+ ```
28
+
29
+ * place the executable file <code>build/sambamba</code> to somewhere in your $PATH,
30
+ for example:
31
+
32
+ ```sh
33
+ cp build/sambamba /usr/local/bin
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ ```ruby
39
+ require 'bio-sambamba'
40
+ ```
41
+
42
+ The API doc is online. For more code examples see the test files in
43
+ the source tree.
44
+
45
+ ## Project home page
46
+
47
+ Information on the source tree, documentation, examples, issues and
48
+ how to contribute, see
49
+
50
+ http://github.com/lomereiter/bioruby-sambamba
51
+
52
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
53
+
54
+ ## Cite
55
+
56
+ If you use this software, please cite one of
57
+
58
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
59
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
60
+
61
+ ## Biogems.info
62
+
63
+ This Biogem is published at [#bio-sambamba](http://biogems.info/index.html)
64
+
65
+ ## Copyright
66
+
67
+ Copyright (c) 2012 Artem Tarasov. See LICENSE.txt for further details.
68
+
data/Rakefile ADDED
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-sambamba"
18
+ gem.homepage = "http://github.com/lomereiter/bioruby-sambamba"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Ruby wrapper for Sambamba tool}
21
+ gem.description = %Q{New Sambamba library comes with a command-line tool for working with SAM/BAM files. This gem brings some of its functionality to Ruby.}
22
+ gem.email = "lomereiter@gmail.com"
23
+ gem.authors = ["Artem Tarasov"]
24
+ # dependencies defined in Gemfile
25
+
26
+ gem.files.include "lib/bio-sambamba/*.rb"
27
+ gem.files.include "lib/bio-sambamba.rb"
28
+ end
29
+ Jeweler::RubygemsDotOrgTasks.new
30
+
31
+ require 'cucumber/rake/task'
32
+ Cucumber::Rake::Task.new do |features|
33
+ end
34
+
35
+ task :test => :cucumber
36
+
37
+ task :default => :test
38
+
39
+ require 'rdoc/task'
40
+ Rake::RDocTask.new do |rdoc|
41
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
42
+
43
+ rdoc.rdoc_dir = 'rdoc'
44
+ rdoc.title = "bio-sambamba #{version}"
45
+ rdoc.rdoc_files.include('README*')
46
+ rdoc.rdoc_files.include('lib/**/*.rb')
47
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,40 @@
1
+ Feature: iterating alignment records
2
+
3
+ In order to have access to all information contained in a BAM file,
4
+ As a bioinformatician,
5
+ I want to be able to iterate alignment records from Ruby
6
+ And have access to all their fields and tags.
7
+
8
+ Scenario: accessing alignment records
9
+ Given I opened a valid BAM file
10
+ When I use its 'alignments' method
11
+ Then I should be able to iterate the returned object with 'each'
12
+ And the objects which I iterate over should represent the alignments
13
+ And I should be able to access all fields mentioned in SAM/BAM format specification
14
+
15
+ Scenario: access existing alignment tag
16
+ Given I have an alignment
17
+ And it contains some tags
18
+ When I access it like a hash
19
+ And I use 2-character string as a key
20
+ And the alignment has such tag
21
+ Then I should be able to see corresponding value
22
+ And it should be a simple Ruby object (Array, Numeric, or String)
23
+
24
+ Scenario: invalid tag key (not of length 2)
25
+ Given I have an alignment
26
+ When I access it like a hash
27
+ But I use string of length different than two, as a key,
28
+ Then exception should be thrown.
29
+
30
+ Scenario: accessing non-existing alignment tag
31
+ Given I have an alignment
32
+ And it contains some tags
33
+ When I access it like a hash
34
+ But it doesn't contain the requested tag
35
+ Then nil should be returned.
36
+
37
+ Scenario: fetching all tags as a hash
38
+ Given I have an alignment
39
+ When I use its 'tags' method
40
+ Then I should be able to work with the returned object just like with Hash
@@ -0,0 +1,10 @@
1
+ Feature: random access to BAM file
2
+ In order to retrieve information about specific regions,
3
+ I want to be able to quickly fetch alignments overlapping a region.
4
+
5
+ Scenario: fetching alignments
6
+ Given I have a BAM file
7
+ And it's sorted by coordinate
8
+ And I have its index as well
9
+ When I specify reference sequence and region (1-based beginning and end positions)
10
+ Then I should be able to immediately have access to alignments overlapping it
@@ -0,0 +1,23 @@
1
+ Feature: access to information from SAM header
2
+
3
+ In order to work with BAM file,
4
+ I want to see what its header contains.
5
+
6
+ Background:
7
+ Given I opened a valid BAM file
8
+ And it contains SAM header
9
+
10
+ Scenario: getting raw text
11
+ When I call 'header' method
12
+ Then I should see text of SAM header
13
+
14
+ Scenario: accessing version and sorting order
15
+ When SAM header contains @HD line
16
+ Then I should be able to see format version
17
+ And I should be able to see sorting order
18
+
19
+ Scenario: getting information about reference sequences
20
+ When SAM header contains @SQ lines
21
+ Then I should be able to iterate them
22
+ And I should be able to see sequence names
23
+ And I should be able to see their lengths
@@ -0,0 +1,83 @@
1
+ Before do
2
+ @bam = Bio::Bam::File.new 'test/data/ex1_header.bam'
3
+ end
4
+
5
+ When /^I use its 'alignments' method$/ do
6
+ @bam.should respond_to(:alignments)
7
+ end
8
+
9
+ Then /^I should be able to iterate the returned object with 'each'$/ do
10
+ @bam.alignments.should respond_to(:each)
11
+ end
12
+
13
+ Then /^the objects which I iterate over should represent the alignments$/ do
14
+ @bam.alignments.take(100).each do |read|
15
+ read.should be_instance_of(Bio::Bam::Alignment)
16
+ end
17
+ end
18
+
19
+ Then /^I should be able to access all fields mentioned in SAM\/BAM format specification$/ do
20
+ @read = @bam.alignments.first
21
+ @read.read_name.should == 'EAS56_57:6:190:289:82'
22
+ @read.sequence.should == 'CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA'
23
+ @read.position.should == 100
24
+ @read.flag.should == 69
25
+ @read.mapping_quality.should == 0
26
+ @read.cigar_string.should == '*'
27
+ @read.reference.should == 'chr1'
28
+ @read.quality.should == [27, 27, 27, 22, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27, 23, 26, 26, 27, 22, 26, 19, 27, 26, 27, 26, 26, 26, 26, 26, 24, 19, 27, 26]
29
+ end
30
+
31
+ Given /^I have an alignment$/ do
32
+ @read = @bam.alignments.first
33
+ end
34
+
35
+ Given /^it contains some tags$/ do
36
+ end
37
+
38
+ When /^I access it like a hash$/ do
39
+ @read.should respond_to(:[])
40
+ end
41
+
42
+ When /^I use 2-character string as a key$/ do
43
+ @key = 'MF'
44
+ end
45
+
46
+ When /^the alignment has such tag$/ do
47
+ @read[@key].should_not be_nil
48
+ end
49
+
50
+ Then /^I should be able to see corresponding value$/ do
51
+ @read[@key].should be == 192
52
+ end
53
+
54
+ Then /^it should be a simple Ruby object \(Array, Numeric, or String\)$/ do
55
+ @read[@key].should be_kind_of Numeric
56
+ end
57
+
58
+ When /^I use string of length different than two, as a key,$/ do
59
+ @key = 'key'
60
+ end
61
+
62
+ Then /^exception should be thrown\.$/ do
63
+ expect{@read[@key]}.to raise_error(RuntimeError)
64
+ end
65
+
66
+ When /^it doesn't contain the requested tag$/ do
67
+ @key = 'hq'
68
+ end
69
+
70
+ Then /^nil should be returned\.$/ do
71
+ @read[@key].should be_nil
72
+ end
73
+
74
+ When /^I use its 'tags' method$/ do
75
+ @tags = @read.tags
76
+ end
77
+
78
+ Then /^I should be able to work with the returned object just like with Hash$/ do
79
+ @tags.should be_kind_of Hash
80
+ @tags['MF'].should be == 192
81
+ @tags.keys.should be == ['MF']
82
+ @tags.values.should be == [192]
83
+ end
@@ -0,0 +1,22 @@
1
+ Before do
2
+ @bam = Bio::Bam::File.new './test/data/ex1_header.bam'
3
+ end
4
+
5
+ Given /^it's sorted by coordinate$/ do
6
+ @bam.header.sorting_order.should == 'coordinate'
7
+ end
8
+
9
+ Given /^I have its index as well$/ do
10
+ @bam.should have_index
11
+ end
12
+
13
+ When /^I specify reference sequence and region \(1-based beginning and end positions\)$/ do
14
+ @region = (1400 ... 1500)
15
+ @chr = "chr2"
16
+ end
17
+
18
+ Then /^I should be able to immediately have access to alignments overlapping it$/ do
19
+ @alignments = @bam.fetch @chr, @region
20
+ @alignments.should respond_to(:each).with(0).arguments
21
+ @alignments.to_a.length.should == 77
22
+ end
@@ -0,0 +1,56 @@
1
+ Given /^I opened a valid BAM file$/ do
2
+ filename = './test/data/ex1_header.bam'
3
+ File.exists?(filename).should be_true
4
+ @bamfile = Bio::Bam::File.new filename
5
+ end
6
+
7
+ Given /^it contains SAM header$/ do
8
+ @bamfile.header.raw_contents.length.should be > 0
9
+ end
10
+
11
+ When /^I call 'header' method$/ do
12
+ @header = @bamfile.header
13
+ end
14
+
15
+ Then /^I should see text of SAM header$/ do
16
+ @header.raw_contents.should be_kind_of String
17
+ end
18
+
19
+ Given /^SAM header contains @HD line$/ do
20
+ @header = @bamfile.header
21
+ @header.raw_contents.should =~ /^@HD/
22
+ end
23
+
24
+ Then /^I should be able to see format version$/ do
25
+ @version = @header.version
26
+ @version.should be_kind_of String
27
+ @version.length.should be > 0
28
+ end
29
+
30
+ Then /^I should be able to see sorting order$/ do
31
+ @sorting_order = @header.sorting_order
32
+ @sorting_order.should be_kind_of String
33
+ @sorting_order.length.should be > 0
34
+ end
35
+
36
+ Given /^SAM header contains @SQ lines$/ do
37
+ @header = @bamfile.header
38
+ @header.sq_lines.length.should be > 0
39
+ end
40
+
41
+ Then /^I should be able to iterate them$/ do
42
+ @sq_lines = @header.sq_lines
43
+ @sq_lines.should be_kind_of Array
44
+ end
45
+
46
+ Then /^I should be able to see sequence names$/ do
47
+ @line = @sq_lines.first
48
+ @line.should respond_to(:sequence_name).with(0).arguments
49
+ @line.sequence_name.should be_kind_of String
50
+ @line.sequence_name.length.should be > 0
51
+ end
52
+
53
+ Then /^I should be able to see their lengths$/ do
54
+ @line.should respond_to(:sequence_length).with(0).arguments
55
+ @line.sequence_length.should be_kind_of Numeric
56
+ end
@@ -0,0 +1,34 @@
1
+ Before do
2
+ # this file is known to contain some invalid records
3
+ @tagsbam = Bio::Bam::File.new './test/data/tags.bam'
4
+ end
5
+
6
+ Given /^I have an alignment from a BAM file$/ do
7
+ @alignment = @tagsbam.alignments.to_a[32]
8
+ end
9
+
10
+ When /^I call 'valid\?' method$/ do
11
+ pending
12
+ # @is_valid = @alignment.valid?
13
+ end
14
+
15
+ Then /^it should return whether it is valid or not$/ do
16
+ @is_valid.should be_true
17
+ end
18
+
19
+ Given /^I have a BAM file$/ do
20
+ end
21
+
22
+ When /^I want to iterate over its records$/ do
23
+ @records = @tagsbam.alignments
24
+ end
25
+
26
+ Then /^I should have an option to skip invalid ones$/ do
27
+ @records.should respond_to(:each_valid).with(0).arguments
28
+ end
29
+
30
+ Then /^all the reads in this case should be valid$/ do
31
+ count = 0
32
+ @records.each_valid {|read| count += 1 }
33
+ count.should == 411
34
+ end