bettersam 0.0.1.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bettersam.rb +135 -0
- metadata +47 -0
data/lib/bettersam.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
class BetterSam
|
2
|
+
|
3
|
+
# meanings of SAM flag components, with index i
|
4
|
+
# being one more than the exponent 2 must be raised to to get the
|
5
|
+
# value (i.e. value = 2^(i+1))
|
6
|
+
$flags = [
|
7
|
+
nil,
|
8
|
+
0x1, # 1. read paired
|
9
|
+
0x2, # 2. read mapped in proper pair (i.e. with acceptable insert size)
|
10
|
+
0x4, # 3. read unmapped
|
11
|
+
0x8, # 4. mate unmapped
|
12
|
+
0x10, # 5. read reverse strand
|
13
|
+
0x20, # 6. mate reverse strand
|
14
|
+
0x40, # 7. first in pair
|
15
|
+
0x80, # 8. second in pair
|
16
|
+
0x100, # 9. not primary alignment
|
17
|
+
0x200, # 10. read fails platform/vendor quality checks
|
18
|
+
0x400] # 11. read is PCR or optical duplicate
|
19
|
+
|
20
|
+
public
|
21
|
+
attr_accessor :name, :flag, :chrom, :pos, :mapq, :cigar, :mchrom, :mpos, :insert, :seq, :qual, :tags
|
22
|
+
|
23
|
+
def initialize(line=nil)
|
24
|
+
@tags = {}
|
25
|
+
parse_line(line) unless line.nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_line(line)
|
29
|
+
return false if line[0] == "@"
|
30
|
+
|
31
|
+
f = line.chomp.split("\t", -1)
|
32
|
+
raise "SAM lines must have at least 11 fields (had #{f.size})" if f.size < 11
|
33
|
+
|
34
|
+
# colnames = %w(1:name 2:flag 3:chr 4:pos 5:mapq 6:cigar 7:mchr 8:mpos 9:insrt 10:seq 11:qual)
|
35
|
+
|
36
|
+
@name = f[0]
|
37
|
+
@flag = int_or_raw(f[1])
|
38
|
+
@chrom = f[2]
|
39
|
+
@pos = int_or_neg1(f[3])
|
40
|
+
@mapq = int_or_neg1(f[4])
|
41
|
+
@cigar = f[5]
|
42
|
+
@mchrom = f[6]
|
43
|
+
@mpos = int_or_neg1(f[7])
|
44
|
+
@insert = int_or_raw(f[8])
|
45
|
+
@seq = f[9]
|
46
|
+
@qual = f[10]
|
47
|
+
|
48
|
+
@tags = {}
|
49
|
+
i = 11
|
50
|
+
while i < f.size
|
51
|
+
tag = f[i]
|
52
|
+
i += 1
|
53
|
+
colon_index = tag.rindex(':')
|
54
|
+
raise line if f.rindex == nil
|
55
|
+
key = tag[0, colon_index]
|
56
|
+
value = int_or_raw(tag[colon_index + 1, tag.size - colon_index] || "")
|
57
|
+
@tags[key] = value
|
58
|
+
end
|
59
|
+
|
60
|
+
return true;
|
61
|
+
end
|
62
|
+
|
63
|
+
# flag parsing convenience methods
|
64
|
+
|
65
|
+
def read_paired?
|
66
|
+
@flag & $flags[1] != 0
|
67
|
+
end
|
68
|
+
|
69
|
+
def read_properly_paired?
|
70
|
+
@flag & $flags[2] != 0
|
71
|
+
end
|
72
|
+
|
73
|
+
def read_unmapped?
|
74
|
+
@flag & $flags[3] != 0
|
75
|
+
end
|
76
|
+
|
77
|
+
def mate_unmapped?
|
78
|
+
@flag & $flags[4] != 0
|
79
|
+
end
|
80
|
+
|
81
|
+
def read_reverse_strand?
|
82
|
+
@flag & $flags[5] != 0
|
83
|
+
end
|
84
|
+
|
85
|
+
def mate_reverse_strand?
|
86
|
+
@flag & $flags[6] != 0
|
87
|
+
end
|
88
|
+
|
89
|
+
def first_in_pair?
|
90
|
+
@flag & $flags[7] != 0
|
91
|
+
end
|
92
|
+
|
93
|
+
def second_in_pair?
|
94
|
+
@flag & $flags[8] !=0
|
95
|
+
end
|
96
|
+
|
97
|
+
def primary_aln?
|
98
|
+
!(@flag & $flags[9]) != 0
|
99
|
+
end
|
100
|
+
|
101
|
+
def quality_fail?
|
102
|
+
@flag & $flags[10] != 0
|
103
|
+
end
|
104
|
+
|
105
|
+
def pcr_duplicate?
|
106
|
+
@flag & $flags[11] != 0
|
107
|
+
end
|
108
|
+
|
109
|
+
# pair convenience methods
|
110
|
+
|
111
|
+
def both_mapped?
|
112
|
+
!(self.read_unmapped? && self.mate_unmapped?)
|
113
|
+
end
|
114
|
+
|
115
|
+
def pair_opposite_strands?
|
116
|
+
(!self.read_reverse_strand? && self.mate_reverse_strand?) ||
|
117
|
+
(self.read_reverse_strand? && !self.mate_reverse_strand?)
|
118
|
+
end
|
119
|
+
|
120
|
+
def pair_same_strand?
|
121
|
+
!self.pair_opposite_strands?
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
def int_or_neg1(x)
|
128
|
+
Integer(x) rescue -1
|
129
|
+
end
|
130
|
+
|
131
|
+
def int_or_raw(x)
|
132
|
+
Integer(x) rescue x
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bettersam
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.alpha
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Richard Smith
|
9
|
+
- Jesse Rodriguez
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2013-09-17 00:00:00.000000000 Z
|
14
|
+
dependencies: []
|
15
|
+
description: Extended SAM (Sequence Alignment/Map) file parsing
|
16
|
+
email: rds45@cam.ac.uk
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/bettersam.rb
|
22
|
+
homepage: https://github.com/blahah/bettersam
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ! '>'
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.3.1
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 1.8.24
|
44
|
+
signing_key:
|
45
|
+
specification_version: 3
|
46
|
+
summary: Extended SAM file parsing
|
47
|
+
test_files: []
|