bettersam 0.0.1.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bettersam.rb +135 -0
- metadata +47 -0
data/lib/bettersam.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
class BetterSam
|
2
|
+
|
3
|
+
# meanings of SAM flag components, with index i
|
4
|
+
# being one more than the exponent 2 must be raised to to get the
|
5
|
+
# value (i.e. value = 2^(i+1))
|
6
|
+
$flags = [
|
7
|
+
nil,
|
8
|
+
0x1, # 1. read paired
|
9
|
+
0x2, # 2. read mapped in proper pair (i.e. with acceptable insert size)
|
10
|
+
0x4, # 3. read unmapped
|
11
|
+
0x8, # 4. mate unmapped
|
12
|
+
0x10, # 5. read reverse strand
|
13
|
+
0x20, # 6. mate reverse strand
|
14
|
+
0x40, # 7. first in pair
|
15
|
+
0x80, # 8. second in pair
|
16
|
+
0x100, # 9. not primary alignment
|
17
|
+
0x200, # 10. read fails platform/vendor quality checks
|
18
|
+
0x400] # 11. read is PCR or optical duplicate
|
19
|
+
|
20
|
+
public
|
21
|
+
attr_accessor :name, :flag, :chrom, :pos, :mapq, :cigar, :mchrom, :mpos, :insert, :seq, :qual, :tags
|
22
|
+
|
23
|
+
def initialize(line=nil)
|
24
|
+
@tags = {}
|
25
|
+
parse_line(line) unless line.nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_line(line)
|
29
|
+
return false if line[0] == "@"
|
30
|
+
|
31
|
+
f = line.chomp.split("\t", -1)
|
32
|
+
raise "SAM lines must have at least 11 fields (had #{f.size})" if f.size < 11
|
33
|
+
|
34
|
+
# colnames = %w(1:name 2:flag 3:chr 4:pos 5:mapq 6:cigar 7:mchr 8:mpos 9:insrt 10:seq 11:qual)
|
35
|
+
|
36
|
+
@name = f[0]
|
37
|
+
@flag = int_or_raw(f[1])
|
38
|
+
@chrom = f[2]
|
39
|
+
@pos = int_or_neg1(f[3])
|
40
|
+
@mapq = int_or_neg1(f[4])
|
41
|
+
@cigar = f[5]
|
42
|
+
@mchrom = f[6]
|
43
|
+
@mpos = int_or_neg1(f[7])
|
44
|
+
@insert = int_or_raw(f[8])
|
45
|
+
@seq = f[9]
|
46
|
+
@qual = f[10]
|
47
|
+
|
48
|
+
@tags = {}
|
49
|
+
i = 11
|
50
|
+
while i < f.size
|
51
|
+
tag = f[i]
|
52
|
+
i += 1
|
53
|
+
colon_index = tag.rindex(':')
|
54
|
+
raise line if f.rindex == nil
|
55
|
+
key = tag[0, colon_index]
|
56
|
+
value = int_or_raw(tag[colon_index + 1, tag.size - colon_index] || "")
|
57
|
+
@tags[key] = value
|
58
|
+
end
|
59
|
+
|
60
|
+
return true;
|
61
|
+
end
|
62
|
+
|
63
|
+
# flag parsing convenience methods
|
64
|
+
|
65
|
+
def read_paired?
|
66
|
+
@flag & $flags[1] != 0
|
67
|
+
end
|
68
|
+
|
69
|
+
def read_properly_paired?
|
70
|
+
@flag & $flags[2] != 0
|
71
|
+
end
|
72
|
+
|
73
|
+
def read_unmapped?
|
74
|
+
@flag & $flags[3] != 0
|
75
|
+
end
|
76
|
+
|
77
|
+
def mate_unmapped?
|
78
|
+
@flag & $flags[4] != 0
|
79
|
+
end
|
80
|
+
|
81
|
+
def read_reverse_strand?
|
82
|
+
@flag & $flags[5] != 0
|
83
|
+
end
|
84
|
+
|
85
|
+
def mate_reverse_strand?
|
86
|
+
@flag & $flags[6] != 0
|
87
|
+
end
|
88
|
+
|
89
|
+
def first_in_pair?
|
90
|
+
@flag & $flags[7] != 0
|
91
|
+
end
|
92
|
+
|
93
|
+
def second_in_pair?
|
94
|
+
@flag & $flags[8] !=0
|
95
|
+
end
|
96
|
+
|
97
|
+
def primary_aln?
|
98
|
+
!(@flag & $flags[9]) != 0
|
99
|
+
end
|
100
|
+
|
101
|
+
def quality_fail?
|
102
|
+
@flag & $flags[10] != 0
|
103
|
+
end
|
104
|
+
|
105
|
+
def pcr_duplicate?
|
106
|
+
@flag & $flags[11] != 0
|
107
|
+
end
|
108
|
+
|
109
|
+
# pair convenience methods
|
110
|
+
|
111
|
+
def both_mapped?
|
112
|
+
!(self.read_unmapped? && self.mate_unmapped?)
|
113
|
+
end
|
114
|
+
|
115
|
+
def pair_opposite_strands?
|
116
|
+
(!self.read_reverse_strand? && self.mate_reverse_strand?) ||
|
117
|
+
(self.read_reverse_strand? && !self.mate_reverse_strand?)
|
118
|
+
end
|
119
|
+
|
120
|
+
def pair_same_strand?
|
121
|
+
!self.pair_opposite_strands?
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
def int_or_neg1(x)
|
128
|
+
Integer(x) rescue -1
|
129
|
+
end
|
130
|
+
|
131
|
+
def int_or_raw(x)
|
132
|
+
Integer(x) rescue x
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bettersam
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.alpha
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Richard Smith
|
9
|
+
- Jesse Rodriguez
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2013-09-17 00:00:00.000000000 Z
|
14
|
+
dependencies: []
|
15
|
+
description: Extended SAM (Sequence Alignment/Map) file parsing
|
16
|
+
email: rds45@cam.ac.uk
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/bettersam.rb
|
22
|
+
homepage: https://github.com/blahah/bettersam
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ! '>'
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.3.1
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 1.8.24
|
44
|
+
signing_key:
|
45
|
+
specification_version: 3
|
46
|
+
summary: Extended SAM file parsing
|
47
|
+
test_files: []
|