bio-sambamba 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/README.md +2 -1
- data/VERSION +1 -1
- data/features/filtering.feature +74 -0
- data/features/step_definitions/filtering_steps.rb +86 -0
- data/features/step_definitions/iterate-alignments_steps.rb +1 -1
- data/features/step_definitions/syntax-sugar_steps.rb +42 -0
- data/features/syntax-sugar.feature +19 -8
- data/lib/bio-sambamba.rb +3 -0
- data/lib/bio-sambamba/alignment.rb +34 -8
- data/lib/bio-sambamba/alignmentiterator.rb +76 -5
- data/lib/bio-sambamba/bamfile.rb +28 -10
- data/lib/bio-sambamba/basepair.rb +17 -0
- data/lib/bio-sambamba/exception.rb +16 -0
- data/lib/bio-sambamba/filtering.rb +201 -0
- data/lib/bio-sambamba/samfile.rb +4 -1
- data/lib/bio-sambamba/samheader.rb +11 -6
- metadata +108 -89
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# bio-sambamba
|
2
2
|
|
3
|
+
[![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/lomereiter/bioruby-sambamba)
|
3
4
|
[![Build Status](https://secure.travis-ci.org/lomereiter/bioruby-sambamba.png)](http://travis-ci.org/lomereiter/bioruby-sambamba)
|
4
5
|
|
5
6
|
Full description goes here
|
@@ -13,7 +14,7 @@ Note: this software is under active development!
|
|
13
14
|
rake install
|
14
15
|
```
|
15
16
|
|
16
|
-
In order to use the gem, you also need <code>sambamba</code> tool installed.
|
17
|
+
In order to use the gem, you also need <code>sambamba</code> tool installed (version >= 0.2.4).
|
17
18
|
|
18
19
|
If you use Debian, you can download a package for your architecture from
|
19
20
|
[github downloads](http://github.com/lomereiter/sambamba/downloads).
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
@@ -0,0 +1,74 @@
|
|
1
|
+
Feature: custom filters for BAM data
|
2
|
+
|
3
|
+
In order to filter alignment records faster,
|
4
|
+
As a user who deals with a lot of data,
|
5
|
+
I want to be able to create custom filters and have them executed
|
6
|
+
not in Ruby but in sambamba view tool.
|
7
|
+
|
8
|
+
Background:
|
9
|
+
Given I have a BAM file
|
10
|
+
And I have an iterator for alignment records in this file
|
11
|
+
|
12
|
+
Scenario: setting filter for alignments
|
13
|
+
When I create a filter with Bio::Bam::filter function
|
14
|
+
Then I should be able to pass this filter to the 'with_filter' method of the iterator
|
15
|
+
And it should give me a enumerator for those alignments which pass the filter
|
16
|
+
|
17
|
+
Scenario Outline: setting conditions for flags
|
18
|
+
When I create a filter with Bio::Bam::filter { flag.<name>.<is_set_or_unset> }
|
19
|
+
Then I should get all alignments where flag called <name> <is_set_or_unset> correspondingly.
|
20
|
+
|
21
|
+
Examples:
|
22
|
+
| name | is_set_or_unset |
|
23
|
+
| proper_pair | is_set |
|
24
|
+
| mate_is_unmapped | is_unset |
|
25
|
+
|
26
|
+
Scenario Outline: setting conditions for integer and string fields
|
27
|
+
When I create a filter with Bio::Bam::filter { <field> <comparison_operator> <value> }
|
28
|
+
Then I should get all alignments where <comparison_operator> for <field> and <value> is true
|
29
|
+
|
30
|
+
Examples:
|
31
|
+
| field | comparison_operator | value |
|
32
|
+
| mate_position | < | 1.Kbp |
|
33
|
+
| mapping_quality | >= | 39 |
|
34
|
+
| read_name | > | 'EAS114_' |
|
35
|
+
|
36
|
+
Scenario Outline: setting conditions for tags
|
37
|
+
When I create a filter with Bio::Bam::filter { tag(:<tagname>) <comparison_operator> <value> }
|
38
|
+
Then I should get all alignments where tag with name <tagname> exists
|
39
|
+
And <comparison_operator> for tag with name <tagname> and <value> is true
|
40
|
+
|
41
|
+
Examples:
|
42
|
+
| tagname | comparison_operator | value |
|
43
|
+
| Aq | == | 72 |
|
44
|
+
| UQ | != | 0 |
|
45
|
+
| NM | >= | 3 |
|
46
|
+
|
47
|
+
Scenario Outline: regex matching for tags and string fields
|
48
|
+
When I create a filter with Bio::Bam::filter { <field_or_tag> =~ <regex> }
|
49
|
+
Then I should get all alignments where <field_or_tag> matches given <regex>
|
50
|
+
|
51
|
+
Examples:
|
52
|
+
| field_or_tag | regex |
|
53
|
+
| read_name | /^B7_(?:\d+):+\d+$/ |
|
54
|
+
| cigar | /[^M\d]/ |
|
55
|
+
|
56
|
+
Scenario Outline: logical operations on filters
|
57
|
+
Given I have several <conditions>
|
58
|
+
When I enclose them by a <n-ary operation> block
|
59
|
+
Then I should get a condition representing <n-ary operation> of those
|
60
|
+
|
61
|
+
Examples:
|
62
|
+
| conditions | n-ary operation |
|
63
|
+
| ["position >= 100.bp", "mate_position <= 200.bp"] | union |
|
64
|
+
| ["tag(:NM) == 3", "tag(:UQ) == 42"] | intersection |
|
65
|
+
|
66
|
+
Scenario Outline: negation of filter
|
67
|
+
Given I have a condition <condition>
|
68
|
+
When I enclose it in 'negate' block
|
69
|
+
Then I should have a condition representing the same alignments as <'not' equivalent>
|
70
|
+
|
71
|
+
Examples:
|
72
|
+
| condition | 'not' equivalent |
|
73
|
+
| flag.paired.is_set | flag.paired.is_unset |
|
74
|
+
| mapping_quality >= 50 | mapping_quality < 50 |
|
@@ -0,0 +1,86 @@
|
|
1
|
+
Given /^I have an iterator for alignment records in this file$/ do
|
2
|
+
@iter = @bam.alignments
|
3
|
+
end
|
4
|
+
|
5
|
+
When /^I create a filter with Bio::Bam::filter function$/ do
|
6
|
+
@filter = Bio::Bam::filter { mapping_quality > 50 }
|
7
|
+
end
|
8
|
+
|
9
|
+
Then /^I should be able to pass this filter to the 'with_filter' method of the iterator$/ do
|
10
|
+
@iter.should respond_to(:with_filter).with(1).argument
|
11
|
+
@alignments = @iter.with_filter @filter
|
12
|
+
end
|
13
|
+
|
14
|
+
Then /^it should give me a enumerator for those alignments which pass the filter$/ do
|
15
|
+
@alignments.each do |read|
|
16
|
+
read.mapping_quality.should > 50
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
When /^I create a filter with (.*?{[^}]*?})\s*$/ do |query|
|
21
|
+
@filter = eval(query)
|
22
|
+
end
|
23
|
+
|
24
|
+
Then /^I should get all alignments where (.*?) for (\w+?) and (.*?) is true$/ do |op, field, val|
|
25
|
+
@bam.alignments.with_filter(@filter).take(100).each do |read|
|
26
|
+
eval("read.#{field} #{op} #{val}").should be true
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
Then /^I should get all alignments where tag with name (\w{2}) exists$/ do |tagname|
|
31
|
+
@bam.alignments.with_filter(@filter).take(100).each do |read|
|
32
|
+
read.tags[tagname].should_not be nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
Then /^(.*?) for tag with name (\w{2}) and (.*?) is true$/ do |op, tagname, val|
|
37
|
+
@bam.alignments.with_filter(@filter).take(100).each do |read|
|
38
|
+
eval("read.tags['#{tagname}'] #{op} #{val}").should be true
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
Then /^I should get all alignments where (\w+) matches given (.*?)$/ do |field, regex|
|
43
|
+
@bam.alignments.with_filter(@filter).take(100).each do |read|
|
44
|
+
eval("read.#{field}.should =~ #{regex}")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
Then /^I should get all alignments where flag called (\w+) (\w+) correspondingly.$/ do |flagname, op|
|
49
|
+
@bam.alignments.with_filter(@filter).take(100).each do |read|
|
50
|
+
read.send(flagname.to_sym).should == (op == 'is_set')
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
Given /^I have several (.*?)$/ do |conditions|
|
55
|
+
@conditions = eval(conditions)
|
56
|
+
@filters = @conditions.map {|condition| Bio::Bam::filter { eval(condition) }}
|
57
|
+
end
|
58
|
+
|
59
|
+
When /^I enclose them by a (\w+) block$/ do |op|
|
60
|
+
all = @conditions.join "\n"
|
61
|
+
@all_filter = Bio::Bam::filter { eval("#{op} { #{all} }") }
|
62
|
+
end
|
63
|
+
|
64
|
+
Then /^I should get a condition representing (\w+) of those$/ do |op|
|
65
|
+
seq_c = @filters.map {|f| @bam.alignments.with_filter(f).map(&:sequence).to_a}
|
66
|
+
seq_f = @bam.alignments.with_filter(@all_filter).map(&:sequence).to_a
|
67
|
+
if op == 'union' then
|
68
|
+
seq_c.reduce(&:|).sort.should == seq_f.uniq.sort
|
69
|
+
elsif op == 'intersection'
|
70
|
+
seq_c.reduce(&:&).sort.should == seq_f.uniq.sort
|
71
|
+
else
|
72
|
+
raise 'unknown op: ' + op
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
Given /^I have a condition (.*?)$/ do |cond|
|
77
|
+
@negation = Bio::Bam::filter { negate { eval(cond) }}
|
78
|
+
end
|
79
|
+
|
80
|
+
When /^I enclose it in 'negate' block$/ do
|
81
|
+
end
|
82
|
+
|
83
|
+
Then /^I should have a condition representing the same alignments as (.*?)$/ do |equiv|
|
84
|
+
@equiv = Bio::Bam::filter { eval(equiv) }
|
85
|
+
@bam.alignments.with_filter(@negation).map(&:read_name).should == @bam.alignments.with_filter(@equiv).map(&:read_name)
|
86
|
+
end
|
@@ -23,7 +23,7 @@ Then /^I should be able to access all fields mentioned in SAM\/BAM format specif
|
|
23
23
|
@read.position.should == 100
|
24
24
|
@read.flag.should == 69
|
25
25
|
@read.mapping_quality.should == 0
|
26
|
-
@read.
|
26
|
+
@read.cigar.should == '*'
|
27
27
|
@read.reference.should == 'chr1'
|
28
28
|
@read.quality.should == [27, 27, 27, 22, 27, 27, 27, 26, 27, 27, 27, 27, 27, 27, 27, 27, 23, 26, 26, 27, 22, 26, 19, 27, 26, 27, 26, 26, 26, 26, 26, 24, 19, 27, 26]
|
29
29
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
Given /^the associated BAI file$/ do
|
2
|
+
@bam.has_index?.should be_true
|
3
|
+
end
|
4
|
+
|
5
|
+
When /^I say "(.*?)"$/ do |expr|
|
6
|
+
bam = @bam
|
7
|
+
@alignments = eval(expr)
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^I should get an enumerator for alignments$/ do
|
11
|
+
@alignments.first.should be_instance_of Bio::Bam::Alignment
|
12
|
+
end
|
13
|
+
|
14
|
+
Then /^each of them should have reference sequence "(.*?)"$/ do |sequence|
|
15
|
+
@alignments.each do |read|
|
16
|
+
read.reference.should == sequence
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
Then /^each of them should overlap region \[(\d+), (\d+)\] \(1-based\)$/ do |begpos, endpos|
|
21
|
+
@alignments.each do |read|
|
22
|
+
read.position.should <= endpos.to_i
|
23
|
+
(read.position + read.bases_covered).should >= begpos.to_i
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Given /^an alignment iterator$/ do
|
28
|
+
@alignments = @bam.alignments
|
29
|
+
end
|
30
|
+
|
31
|
+
When /^I use 'select' method of this iterator with a block$/ do
|
32
|
+
@selected_reads = @alignments.select { read_name =~ /^EAS2/ }
|
33
|
+
end
|
34
|
+
|
35
|
+
Then /^it should be the same as calling Bio::Bam::filter with this block$/ do
|
36
|
+
@filter = Bio::Bam::filter { read_name =~ /^EAS2/ }
|
37
|
+
end
|
38
|
+
|
39
|
+
Then /^passing it as an argument to the 'with_filter' method$/ do
|
40
|
+
# TODO: make comparison function for alignments
|
41
|
+
@selected_reads.to_a.length.should == @alignments.with_filter(@filter).to_a.length
|
42
|
+
end
|
@@ -4,14 +4,25 @@ Feature: syntax sugar
|
|
4
4
|
As a Rubyista,
|
5
5
|
I want some syntax sugar.
|
6
6
|
|
7
|
-
Scenario:
|
7
|
+
Scenario: human-readable requests
|
8
8
|
Given I have a BAM file
|
9
|
-
And associated BAI file
|
10
|
-
When I say "bam.alignments.referencing(
|
11
|
-
Then I should get
|
9
|
+
And the associated BAI file
|
10
|
+
When I say "bam.alignments.referencing('chr1').overlapping(1.Kbp .. 2.Kbp)"
|
11
|
+
Then I should get an enumerator for alignments
|
12
|
+
And each of them should have reference sequence "chr1"
|
13
|
+
And each of them should overlap region [1000, 2000] (1-based)
|
12
14
|
|
13
|
-
Scenario:
|
15
|
+
Scenario: shortcuts for programmers who are too lazy to type that much
|
14
16
|
Given I have a BAM file
|
15
|
-
And associated BAI file
|
16
|
-
When I say "bam[
|
17
|
-
Then I should get
|
17
|
+
And the associated BAI file
|
18
|
+
When I say "bam['chr1'][500.bp .. 1.Kbp]"
|
19
|
+
Then I should get an enumerator for alignments
|
20
|
+
And each of them should have reference sequence "chr1"
|
21
|
+
And each of them should overlap region [500, 1000] (1-based)
|
22
|
+
|
23
|
+
Scenario: shortcut for filtering
|
24
|
+
Given I have a BAM file
|
25
|
+
And an alignment iterator
|
26
|
+
When I use 'select' method of this iterator with a block
|
27
|
+
Then it should be the same as calling Bio::Bam::filter with this block
|
28
|
+
And passing it as an argument to the 'with_filter' method
|
data/lib/bio-sambamba.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
require 'bio/command'
|
2
2
|
require 'oj'
|
3
3
|
|
4
|
+
require 'bio-sambamba/exception.rb'
|
4
5
|
require 'bio-sambamba/samheader.rb'
|
5
6
|
require 'bio-sambamba/alignment.rb'
|
6
7
|
require 'bio-sambamba/alignmentiterator.rb'
|
7
8
|
require 'bio-sambamba/bamfile.rb'
|
8
9
|
require 'bio-sambamba/samfile.rb'
|
10
|
+
require 'bio-sambamba/basepair.rb'
|
11
|
+
require 'bio-sambamba/filtering.rb'
|
@@ -15,6 +15,10 @@ module Bio
|
|
15
15
|
@json['tags'][tag]
|
16
16
|
end
|
17
17
|
|
18
|
+
def ==(read)
|
19
|
+
read.json == json
|
20
|
+
end
|
21
|
+
|
18
22
|
# Hash of record tags
|
19
23
|
attr_reader :tags if false
|
20
24
|
|
@@ -31,7 +35,7 @@ module Bio
|
|
31
35
|
attr_reader :mapping_quality if false
|
32
36
|
|
33
37
|
# CIGAR string
|
34
|
-
attr_reader :
|
38
|
+
attr_reader :cigar if false
|
35
39
|
|
36
40
|
# Observed template length
|
37
41
|
attr_reader :template_length if false
|
@@ -52,18 +56,36 @@ module Bio
|
|
52
56
|
# 1-based leftmost position of the mate/next segment
|
53
57
|
attr_reader :mate_position if false
|
54
58
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
# The number of reference bases covered
|
60
|
+
def bases_covered
|
61
|
+
return 0 if cigar == '*'
|
62
|
+
cigar.split(/([MIDNSHP=X])/).each_slice(2).reduce(0) {|res, op|
|
63
|
+
res += op[0].to_i unless ('M=XDN'.index op[1]).nil?
|
64
|
+
res
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
{'position' => 'pos',
|
59
69
|
'mapping_quality' => 'mapq',
|
60
|
-
'cigar_string' => 'cigar',
|
61
70
|
'template_length' => 'tlen',
|
62
71
|
'flag' => 'flag',
|
72
|
+
'mate_position' => 'pnext'
|
73
|
+
}.each do |k, v|
|
74
|
+
eval <<-DEFINE_READER
|
75
|
+
def #{k}
|
76
|
+
@json['#{v}'].to_i
|
77
|
+
end
|
78
|
+
DEFINE_READER
|
79
|
+
end
|
80
|
+
|
81
|
+
{'tags' => 'tags',
|
82
|
+
'reference' => 'rname',
|
83
|
+
'read_name' => 'qname',
|
84
|
+
'cigar' => 'cigar',
|
63
85
|
'quality' => 'qual',
|
64
86
|
'sequence' => 'seq',
|
65
|
-
'mate_reference' => 'rnext'
|
66
|
-
|
87
|
+
'mate_reference' => 'rnext'
|
88
|
+
}.each do |k, v|
|
67
89
|
eval <<-DEFINE_READER
|
68
90
|
def #{k}
|
69
91
|
@json['#{v}']
|
@@ -125,6 +147,10 @@ module Bio
|
|
125
147
|
def is_duplicate
|
126
148
|
(flag & 0x400) != 0
|
127
149
|
end
|
150
|
+
|
151
|
+
private
|
152
|
+
attr_accessor :json
|
153
|
+
|
128
154
|
end
|
129
155
|
|
130
156
|
end
|
@@ -4,6 +4,7 @@ module Bio
|
|
4
4
|
# Class for iterating through alignments
|
5
5
|
class AlignmentIterator
|
6
6
|
include Enumerable
|
7
|
+
include SambambaStderrParser
|
7
8
|
|
8
9
|
# Creates a new AlignmentIterator object which will
|
9
10
|
# parse JSON outputted by a specified command.
|
@@ -26,19 +27,89 @@ module Bio
|
|
26
27
|
end
|
27
28
|
end
|
28
29
|
|
30
|
+
private
|
31
|
+
|
32
|
+
def get_command
|
33
|
+
command = @command
|
34
|
+
|
35
|
+
if not @chromosome.nil? then
|
36
|
+
if not @region.nil? then
|
37
|
+
command.push "#{@chromosome}:#{@region.min}-#{@region.max}"
|
38
|
+
else
|
39
|
+
command.push "#{@chromosome}"
|
40
|
+
end
|
41
|
+
elsif not @region.nil? then
|
42
|
+
raise 'must specify a reference when doing a region query'
|
43
|
+
end
|
44
|
+
|
45
|
+
command
|
46
|
+
end
|
47
|
+
|
48
|
+
public
|
49
|
+
|
29
50
|
# Iterate through all alignments skipping
|
30
51
|
# validation checks
|
31
52
|
def each
|
32
|
-
|
33
53
|
return enum_for(:each) if not block_given?
|
34
54
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
55
|
+
command = get_command
|
56
|
+
|
57
|
+
Bio::Command.call_command_open3(command) do |pin, pout, perr|
|
58
|
+
pout.each do |line|
|
59
|
+
json = Oj.load(line)
|
60
|
+
yield Bio::Bam::Alignment.new(json)
|
39
61
|
end
|
62
|
+
raise_exception_if_stderr_is_not_empty(perr)
|
40
63
|
end
|
41
64
|
end
|
65
|
+
|
66
|
+
# Set filter for alignments
|
67
|
+
def with_filter(filter)
|
68
|
+
iter = self.clone
|
69
|
+
iter.command.push('-F')
|
70
|
+
iter.command.push(filter.to_s)
|
71
|
+
iter
|
72
|
+
end
|
73
|
+
|
74
|
+
def select(&block)
|
75
|
+
with_filter (Bio::Bam::filter &block)
|
76
|
+
end
|
77
|
+
|
78
|
+
def count
|
79
|
+
command = get_command
|
80
|
+
command.push('-c')
|
81
|
+
Bio::Command.call_command_open3(command) do |pin, pout, perr|
|
82
|
+
raise_exception_if_stderr_is_not_empty(perr)
|
83
|
+
pout.readline.to_i
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def referencing(chr)
|
88
|
+
iter = self.clone
|
89
|
+
iter.chromosome = chr
|
90
|
+
iter
|
91
|
+
end
|
92
|
+
|
93
|
+
def overlapping(reg)
|
94
|
+
iter = self.clone
|
95
|
+
iter.region = reg
|
96
|
+
iter
|
97
|
+
end
|
98
|
+
|
99
|
+
def [](reg)
|
100
|
+
overlapping(reg)
|
101
|
+
end
|
102
|
+
|
103
|
+
def clone
|
104
|
+
iter = AlignmentIterator.new @command
|
105
|
+
iter.chromosome = chromosome
|
106
|
+
iter.region = region
|
107
|
+
iter
|
108
|
+
end
|
109
|
+
|
110
|
+
attr_accessor :chromosome
|
111
|
+
attr_accessor :region
|
112
|
+
attr_accessor :command
|
42
113
|
end
|
43
114
|
|
44
115
|
end
|
data/lib/bio-sambamba/bamfile.rb
CHANGED
@@ -1,14 +1,25 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
1
3
|
module Bio
|
2
4
|
|
5
|
+
private
|
6
|
+
RubyFile = File
|
7
|
+
|
8
|
+
public
|
3
9
|
# Module for reading BAM files
|
4
10
|
module Bam
|
5
11
|
|
12
|
+
public
|
13
|
+
|
6
14
|
# Class providing access to BAM files
|
7
15
|
class File
|
8
|
-
|
16
|
+
|
17
|
+
include FileExistenceChecker
|
18
|
+
|
9
19
|
# Creates an object for access to BAM file
|
10
20
|
def initialize(filename)
|
11
21
|
@filename = filename
|
22
|
+
check_file_existence filename
|
12
23
|
end
|
13
24
|
|
14
25
|
# SAM header
|
@@ -18,13 +29,14 @@ module Bio
|
|
18
29
|
|
19
30
|
# Returns an AlignmentIterator object for iterating over all alignments in the file
|
20
31
|
def alignments
|
21
|
-
Bio::Bam::AlignmentIterator.new ['sambamba', 'view', '--format
|
32
|
+
Bio::Bam::AlignmentIterator.new ['sambamba', 'view', '--format', 'json', @filename]
|
22
33
|
end
|
23
34
|
|
24
35
|
# True if index file was found
|
25
36
|
def has_index?
|
26
|
-
|
27
|
-
|
37
|
+
fn1 = @filename + '.bai'
|
38
|
+
fn2 = @filename.chomp(RubyFile.extname(@filename)) + '.bai'
|
39
|
+
RubyFile.exists?(fn1) || RubyFile.exists?(fn2)
|
28
40
|
end
|
29
41
|
|
30
42
|
# Fetches alignments overlapping a region.
|
@@ -35,11 +47,17 @@ module Bio
|
|
35
47
|
# * _chr_: reference sequence
|
36
48
|
# * _region_: a Range representing an interval. Coordinates are 1-based.
|
37
49
|
def fetch(chr, region)
|
38
|
-
Bio::Bam::AlignmentIterator.new ['sambamba', 'view', '--format=json',
|
39
|
-
|
40
|
-
|
50
|
+
iter = Bio::Bam::AlignmentIterator.new ['sambamba', 'view', '--format=json',
|
51
|
+
@filename]
|
52
|
+
iter.chromosome = chr
|
53
|
+
iter.region = region
|
54
|
+
iter
|
55
|
+
end
|
56
|
+
|
57
|
+
def [](chr)
|
58
|
+
fetch(chr, nil)
|
41
59
|
end
|
42
|
-
end
|
60
|
+
end # class File
|
43
61
|
|
44
|
-
end
|
45
|
-
end
|
62
|
+
end # module Bam
|
63
|
+
end # module Bio
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Bio
|
2
|
+
|
3
|
+
module SambambaStderrParser
|
4
|
+
private
|
5
|
+
def raise_exception_if_stderr_is_not_empty(perr)
|
6
|
+
msg = perr.read
|
7
|
+
raise msg unless msg.empty?
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module FileExistenceChecker
|
12
|
+
def check_file_existence filename
|
13
|
+
raise "file #{filename} does not exist" unless File.exists? filename
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Bio
|
2
|
+
module Bam
|
3
|
+
|
4
|
+
def self.filter &block
|
5
|
+
AlignmentFilter.new &block
|
6
|
+
end
|
7
|
+
|
8
|
+
class AlignmentFilter
|
9
|
+
def to_s
|
10
|
+
@expression
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize &block
|
14
|
+
qb = QueryBuilder.new
|
15
|
+
qb.instance_eval &block
|
16
|
+
@expression = qb.expression
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
BINARY_OPS = [:>, :<, :>=, :<=, :==, :!=]
|
22
|
+
|
23
|
+
module ComparisonQueries
|
24
|
+
BINARY_OPS.each do |operator|
|
25
|
+
self.send :define_method, operator do |other|
|
26
|
+
if other.kind_of? String then
|
27
|
+
other = '\'' + other.gsub('\'', '\\\\\'') + '\''
|
28
|
+
end
|
29
|
+
@querybuilder.subexpressions << "#{@name} #{operator} #{other}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# see comment above for NumberQueries
|
35
|
+
module StringQueries
|
36
|
+
include ComparisonQueries
|
37
|
+
def =~ regex
|
38
|
+
raise 'operand must be Regexp' unless regex.kind_of? Regexp
|
39
|
+
@querybuilder.subexpressions << "#{@name} =~ #{regex.inspect}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
module FlagQueries
|
44
|
+
def is_set
|
45
|
+
@querybuilder.subexpressions << "#{@name}"
|
46
|
+
end
|
47
|
+
|
48
|
+
def is_unset
|
49
|
+
@querybuilder.subexpressions << "not #{@name}"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class QueryBuilder
|
54
|
+
attr_accessor :subexpressions
|
55
|
+
|
56
|
+
def initialize
|
57
|
+
@subexpressions = []
|
58
|
+
class << @subexpressions
|
59
|
+
def pjoin str
|
60
|
+
self.map{|expr| '(' + expr + ')'}.join str
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def flag
|
66
|
+
FlagQueryBuilder.new(self)
|
67
|
+
end
|
68
|
+
|
69
|
+
def tag tagname
|
70
|
+
TagQueryBuilder.new(self, tagname)
|
71
|
+
end
|
72
|
+
|
73
|
+
@@default_value = { :ref_id => -1,
|
74
|
+
:mate_ref_id => -1,
|
75
|
+
:position => 0,
|
76
|
+
:mate_position => 0,
|
77
|
+
:template_length => 0,
|
78
|
+
:mapping_quality => 255
|
79
|
+
}
|
80
|
+
|
81
|
+
[:ref_id, :mate_ref_id,
|
82
|
+
:position, :mate_position,
|
83
|
+
:mapping_quality,
|
84
|
+
:sequence_length,
|
85
|
+
:template_length].each do |integer_field|
|
86
|
+
|
87
|
+
self.send :define_method, integer_field do
|
88
|
+
nb = NumberQueryBuilder.new(self, integer_field)
|
89
|
+
|
90
|
+
if not @@default_value[integer_field].nil?
|
91
|
+
class << nb
|
92
|
+
def is_unknown
|
93
|
+
expr = "#{@name} == #{@@default_value[@name]}"
|
94
|
+
@querybuilder.subexpressions << expr
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
nb
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
[:read_name, :sequence, :cigar].each do |string_field|
|
105
|
+
self.send :define_method, string_field do
|
106
|
+
StringQueryBuilder.new(self, string_field)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def negate &block
|
111
|
+
qb = QueryBuilder.new
|
112
|
+
qb.instance_eval &block
|
113
|
+
@subexpressions << ('not (' + qb.expression + ')')
|
114
|
+
end
|
115
|
+
|
116
|
+
def union &block
|
117
|
+
qb = QueryBuilder.new
|
118
|
+
qb.instance_eval &block
|
119
|
+
@subexpressions << (qb.subexpressions.pjoin ' or ')
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
|
123
|
+
def intersection &block
|
124
|
+
qb = QueryBuilder.new
|
125
|
+
qb.instance_eval &block
|
126
|
+
@subexpressions << (qb.subexpressions.pjoin ' and ')
|
127
|
+
end
|
128
|
+
|
129
|
+
def expression
|
130
|
+
subexpressions.pjoin ' and '
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class FlagQueryBuilder
|
135
|
+
include FlagQueries
|
136
|
+
|
137
|
+
def initialize(querybuilder)
|
138
|
+
@querybuilder = querybuilder
|
139
|
+
end
|
140
|
+
|
141
|
+
[:unmapped, :mate_is_unmapped, :paired, :proper_pair,
|
142
|
+
:first_of_pair, :second_of_pair, :reverse_strand,
|
143
|
+
:mate_is_reverse_strand, :secondary_alignment,
|
144
|
+
:failed_quality_control, :duplicate].each do |flagname|
|
145
|
+
self.send :define_method, flagname do
|
146
|
+
@name = flagname
|
147
|
+
self
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
class TagQueryBuilder
|
153
|
+
include ComparisonQueries
|
154
|
+
include StringQueries
|
155
|
+
def initialize(querybuilder, tagname)
|
156
|
+
@querybuilder = querybuilder
|
157
|
+
@name = '[' + tagname.to_s + ']'
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class NumberQueryBuilder
|
162
|
+
include ComparisonQueries
|
163
|
+
BINARY_OPS.each do |op|
|
164
|
+
self.send :define_method, op do |rhs|
|
165
|
+
if not rhs.kind_of? Integer then
|
166
|
+
raise "right-hand side must be an integer, not #{rhs.inspect}"
|
167
|
+
end
|
168
|
+
# 1-based -> 0-based
|
169
|
+
if @name == :position || @name == :mate_position then
|
170
|
+
rhs -= 1
|
171
|
+
end
|
172
|
+
super(rhs)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def initialize(querybuilder, symbol)
|
177
|
+
@querybuilder = querybuilder
|
178
|
+
@name = symbol
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class StringQueryBuilder
|
183
|
+
include StringQueries
|
184
|
+
|
185
|
+
BINARY_OPS.each do |op|
|
186
|
+
self.send :define_method, op do |rhs|
|
187
|
+
if not rhs.kind_of? String then
|
188
|
+
raise "right-hand side must be a string, not #{rhs.inspect}"
|
189
|
+
end
|
190
|
+
super(rhs)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def initialize(querybuilder, symbol)
|
195
|
+
@querybuilder = querybuilder
|
196
|
+
@name = symbol
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
end
|
data/lib/bio-sambamba/samfile.rb
CHANGED
@@ -4,10 +4,13 @@ module Bio
|
|
4
4
|
|
5
5
|
# Class providing access to SAM files
|
6
6
|
class File
|
7
|
+
|
8
|
+
include FileExistenceChecker
|
7
9
|
|
8
10
|
# Creates an object for access to SAM file
|
9
11
|
def initialize(filename)
|
10
12
|
@filename = filename
|
13
|
+
check_file_existence filename
|
11
14
|
end
|
12
15
|
|
13
16
|
# SAM header
|
@@ -17,7 +20,7 @@ module Bio
|
|
17
20
|
|
18
21
|
# Returns an AlignmentIterator object for iterating over all alignments in the file
|
19
22
|
def alignments
|
20
|
-
Bio::Bam::AlignmentIterator.new ['sambamba', 'view', '--format
|
23
|
+
Bio::Bam::AlignmentIterator.new ['sambamba', 'view', '--format', 'json', '-S', @filename]
|
21
24
|
end
|
22
25
|
end
|
23
26
|
|
@@ -3,6 +3,7 @@ module Bio
|
|
3
3
|
|
4
4
|
# Represents SAM header
|
5
5
|
class SamHeader
|
6
|
+
include SambambaStderrParser
|
6
7
|
|
7
8
|
# Creates a new SamHeader object for a specified file,
|
8
9
|
# specifying additional options to pass to sambamba tool
|
@@ -14,9 +15,10 @@ module Bio
|
|
14
15
|
# Raw text of SAM header
|
15
16
|
def raw_contents
|
16
17
|
if @raw_contents.nil? then
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
cmd = ['sambamba', 'view', '-H', @filename] + @opts
|
19
|
+
Bio::Command.call_command_open3(cmd) do |pin, pout, perr|
|
20
|
+
@raw_contents = pout.read
|
21
|
+
raise_exception_if_stderr_is_not_empty(perr)
|
20
22
|
end
|
21
23
|
end
|
22
24
|
@raw_contents
|
@@ -55,9 +57,12 @@ module Bio
|
|
55
57
|
private
|
56
58
|
# Calls sambamba to get underlying JSON object
|
57
59
|
def get_json
|
58
|
-
|
59
|
-
line =
|
60
|
-
|
60
|
+
cmd = ['sambamba', 'view', '-H', '--format=json', @filename] + @opts
|
61
|
+
line = ''
|
62
|
+
Bio::Command.call_command_open3(cmd) do |pin, pout, perr|
|
63
|
+
line = pout.read
|
64
|
+
raise_exception_if_stderr_is_not_empty(perr)
|
65
|
+
end
|
61
66
|
@json = Oj.load(line)
|
62
67
|
end
|
63
68
|
end
|
metadata
CHANGED
@@ -1,121 +1,128 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-sambamba
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 1889055196096400351
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Artem Tarasov
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
|
18
|
+
date: 2012-08-15 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
15
23
|
name: bio
|
16
|
-
|
24
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
17
25
|
none: false
|
18
|
-
requirements:
|
26
|
+
requirements:
|
19
27
|
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3725784833518553922
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 4
|
33
|
+
- 2
|
21
34
|
version: 1.4.2
|
35
|
+
requirement: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
22
37
|
type: :runtime
|
23
38
|
prerelease: false
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 1.4.2
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
39
|
name: oj
|
32
|
-
|
40
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
33
41
|
none: false
|
34
|
-
requirements:
|
42
|
+
requirements:
|
35
43
|
- - ~>
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
|
38
|
-
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 3635686038185777981
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 3
|
49
|
+
- 4
|
50
|
+
version: 1.3.4
|
51
|
+
requirement: *id002
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
type: :development
|
39
54
|
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - ~>
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
version: 1.2.9
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
55
|
name: bundler
|
48
|
-
|
56
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
49
57
|
none: false
|
50
|
-
requirements:
|
58
|
+
requirements:
|
51
59
|
- - ~>
|
52
|
-
- !ruby/object:Gem::Version
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
hash: 1287940794363108929
|
62
|
+
segments:
|
63
|
+
- 1
|
64
|
+
- 1
|
65
|
+
- 4
|
53
66
|
version: 1.1.4
|
67
|
+
requirement: *id003
|
68
|
+
- !ruby/object:Gem::Dependency
|
54
69
|
type: :development
|
55
70
|
prerelease: false
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
|
-
requirements:
|
59
|
-
- - ~>
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 1.1.4
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
71
|
name: jeweler
|
64
|
-
|
72
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
65
73
|
none: false
|
66
|
-
requirements:
|
74
|
+
requirements:
|
67
75
|
- - ~>
|
68
|
-
- !ruby/object:Gem::Version
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 4544990976642784663
|
78
|
+
segments:
|
79
|
+
- 1
|
80
|
+
- 8
|
81
|
+
- 3
|
69
82
|
version: 1.8.3
|
83
|
+
requirement: *id004
|
84
|
+
- !ruby/object:Gem::Dependency
|
70
85
|
type: :development
|
71
86
|
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
|
-
requirements:
|
75
|
-
- - ~>
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
version: 1.8.3
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
87
|
name: rspec
|
80
|
-
|
88
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
81
89
|
none: false
|
82
|
-
requirements:
|
90
|
+
requirements:
|
83
91
|
- - ~>
|
84
|
-
- !ruby/object:Gem::Version
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
hash: 3576421176010082098
|
94
|
+
segments:
|
95
|
+
- 2
|
96
|
+
- 7
|
97
|
+
- 0
|
85
98
|
version: 2.7.0
|
99
|
+
requirement: *id005
|
100
|
+
- !ruby/object:Gem::Dependency
|
86
101
|
type: :development
|
87
102
|
prerelease: false
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
|
-
requirements:
|
91
|
-
- - ~>
|
92
|
-
- !ruby/object:Gem::Version
|
93
|
-
version: 2.7.0
|
94
|
-
- !ruby/object:Gem::Dependency
|
95
103
|
name: cucumber
|
96
|
-
|
104
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
97
105
|
none: false
|
98
|
-
requirements:
|
106
|
+
requirements:
|
99
107
|
- - ~>
|
100
|
-
- !ruby/object:Gem::Version
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
hash: 242951009964279202
|
110
|
+
segments:
|
111
|
+
- 1
|
112
|
+
- 2
|
113
|
+
- 0
|
101
114
|
version: 1.2.0
|
102
|
-
|
103
|
-
|
104
|
-
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
|
-
requirements:
|
107
|
-
- - ~>
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: 1.2.0
|
110
|
-
description: New Sambamba library comes with a command-line tool for working with
|
111
|
-
SAM/BAM files. This gem brings some of its functionality to Ruby.
|
115
|
+
requirement: *id006
|
116
|
+
description: New Sambamba library comes with a command-line tool for working with SAM/BAM files. This gem brings some of its functionality to Ruby.
|
112
117
|
email: lomereiter@gmail.com
|
113
118
|
executables: []
|
119
|
+
|
114
120
|
extensions: []
|
115
|
-
|
121
|
+
|
122
|
+
extra_rdoc_files:
|
116
123
|
- LICENSE.txt
|
117
124
|
- README.md
|
118
|
-
files:
|
125
|
+
files:
|
119
126
|
- .document
|
120
127
|
- .travis.yml
|
121
128
|
- Gemfile
|
@@ -123,12 +130,15 @@ files:
|
|
123
130
|
- README.md
|
124
131
|
- Rakefile
|
125
132
|
- VERSION
|
133
|
+
- features/filtering.feature
|
126
134
|
- features/iterate-alignments.feature
|
127
135
|
- features/random-access.feature
|
128
136
|
- features/sam-header.feature
|
137
|
+
- features/step_definitions/filtering_steps.rb
|
129
138
|
- features/step_definitions/iterate-alignments_steps.rb
|
130
139
|
- features/step_definitions/random-access_steps.rb
|
131
140
|
- features/step_definitions/sam-header_steps.rb
|
141
|
+
- features/step_definitions/syntax-sugar_steps.rb
|
132
142
|
- features/step_definitions/validation-steps.rb
|
133
143
|
- features/support/env.rb
|
134
144
|
- features/syntax-sugar.feature
|
@@ -137,6 +147,9 @@ files:
|
|
137
147
|
- lib/bio-sambamba/alignment.rb
|
138
148
|
- lib/bio-sambamba/alignmentiterator.rb
|
139
149
|
- lib/bio-sambamba/bamfile.rb
|
150
|
+
- lib/bio-sambamba/basepair.rb
|
151
|
+
- lib/bio-sambamba/exception.rb
|
152
|
+
- lib/bio-sambamba/filtering.rb
|
140
153
|
- lib/bio-sambamba/samfile.rb
|
141
154
|
- lib/bio-sambamba/samheader.rb
|
142
155
|
- test/data/bins.bam
|
@@ -154,31 +167,37 @@ files:
|
|
154
167
|
- test/data/wrong_bc_subfield_length.bam
|
155
168
|
- test/data/wrong_extra_gzip_length.bam
|
156
169
|
homepage: http://github.com/lomereiter/bioruby-sambamba
|
157
|
-
licenses:
|
170
|
+
licenses:
|
158
171
|
- MIT
|
159
172
|
post_install_message:
|
160
173
|
rdoc_options: []
|
161
|
-
|
174
|
+
|
175
|
+
require_paths:
|
162
176
|
- lib
|
163
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
177
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
164
178
|
none: false
|
165
|
-
requirements:
|
166
|
-
- -
|
167
|
-
- !ruby/object:Gem::Version
|
168
|
-
|
169
|
-
segments:
|
179
|
+
requirements:
|
180
|
+
- - ">="
|
181
|
+
- !ruby/object:Gem::Version
|
182
|
+
hash: 2002549777813010636
|
183
|
+
segments:
|
170
184
|
- 0
|
171
|
-
|
172
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
185
|
+
version: "0"
|
186
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
173
187
|
none: false
|
174
|
-
requirements:
|
175
|
-
- -
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
|
188
|
+
requirements:
|
189
|
+
- - ">="
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
hash: 2002549777813010636
|
192
|
+
segments:
|
193
|
+
- 0
|
194
|
+
version: "0"
|
178
195
|
requirements: []
|
196
|
+
|
179
197
|
rubyforge_project:
|
180
|
-
rubygems_version: 1.8.
|
198
|
+
rubygems_version: 1.8.12
|
181
199
|
signing_key:
|
182
200
|
specification_version: 3
|
183
201
|
summary: Ruby wrapper for Sambamba tool
|
184
202
|
test_files: []
|
203
|
+
|