bettersam 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: adf55f7adc240db5cee1c5f188f3abd5798fbb98
4
- data.tar.gz: 49f7b0e878392c32e49f18420777987fdcb252fc
3
+ metadata.gz: ebf104198d7b3ce4721ee95da8e6364049652351
4
+ data.tar.gz: 65fc1835e6563e44febdf76f9cc1feb7cd0c16b0
5
5
  SHA512:
6
- metadata.gz: 74cbfbabd15fca7d5e9d3568b2f2fcb7ddb363f2af9df94db793f877a98565272db38a60000bef79b7b4324a2850afefc111d063148eccefa822ddb4cb6905b3
7
- data.tar.gz: b73440febce482e72fecc812e0afde3478743e128a9bf43f1f6842cd1e28fff1b70786b8fae13a9f60c46803b7ab4ce29e3e844d5d2880e82cab8c23f4407dc3
6
+ metadata.gz: 0fc83632508d5254d030ff3e960db2a6c162fa6297fa0468888d32f377b4e60c003c3d08ef700eb1fda1cf92fa6fa51e5a05f8435ba154728c1e6f71f768e125
7
+ data.tar.gz: c6bfd6f772918cdfb08efaac2af693d23c8938924862f5aa68a87077183576a9d4c4c950c8a018f964b8f461bd0eb8248840f72ec49a69c6fb91fa594117c341
data/Rakefile CHANGED
@@ -5,5 +5,15 @@ Rake::TestTask.new do |t|
5
5
  end
6
6
 
7
7
  desc "Run tests"
8
- task :default => :test
8
+ task :default => ["ext:build", :test]
9
9
 
10
+ namespace :ext do
11
+ desc "Build native extension"
12
+ task :build do
13
+ cd "ext"
14
+ ruby "mkrf_conf.rb"
15
+ sh "rake"
16
+ cd ".."
17
+ end
18
+
19
+ end
data/lib/bettersam.rb CHANGED
@@ -1,270 +1,34 @@
1
- class BetterSam
2
-
3
- # meanings of SAM flag components, with index i
4
- # being one more than the exponent 2 must be raised to to get the
5
- # value (i.e. value = 2^(i+1))
6
- $flags = [
7
- nil,
8
- 0x1, # 1. read paired
9
- 0x2, # 2. read mapped in proper pair (i.e. with acceptable insert size)
10
- 0x4, # 3. read unmapped
11
- 0x8, # 4. mate unmapped
12
- 0x10, # 5. read reverse strand
13
- 0x20, # 6. mate reverse strand
14
- 0x40, # 7. first in pair
15
- 0x80, # 8. second in pair
16
- 0x100, # 9. not primary alignment
17
- 0x200, # 10. read fails platform/vendor quality checks
18
- 0x400] # 11. read is PCR or optical duplicate
19
-
20
- public
21
- attr_accessor :name, :flag, :chrom, :pos, :mapq, :cigar, :mchrom, :mpos
22
- attr_accessor :insert, :seq, :qual, :tags, :length
23
- attr_accessor :snp
24
- attr_reader :cigar_list
25
-
26
- def initialize(line=nil)
27
- @tags = {}
28
- parse_line(line) unless line.nil?
29
- end
30
-
31
- def parse_line(line)
32
- return false if line[0] == "@"
33
-
34
- f = line.chomp.split("\t", -1)
35
- raise "SAM lines must have at least 11 fields (had #{f.size})" if f.size < 11
36
-
37
- # colnames = %w(1:name 2:flag 3:chr 4:pos 5:mapq 6:cigar 7:mchr 8:mpos 9:insrt 10:seq 11:qual)
38
-
39
- @name = f[0]
40
- @flag = int_or_raw(f[1])
41
- @chrom = f[2]
42
- @pos = int_or_neg1(f[3])
43
- @mapq = int_or_neg1(f[4])
44
- @cigar = f[5]
45
- @mchrom = f[6]
46
- @mpos = int_or_neg1(f[7])
47
- @insert = int_or_raw(f[8])
48
- @seq = f[9]
49
- @qual = f[10]
50
- @length = nil
51
-
52
- @tags = {}
53
- i = 11
54
- while i < f.size
55
- tag = f[i]
56
- i += 1
57
- a = tag.split(":")
58
- raise line if a.length != 3
59
- if a[1]=="i"
60
- @tags[a[0].to_sym] = a[2].to_i
61
- elsif a[1]=="Z"
62
- @tags[a[0].to_sym] = a[2]
63
- else
64
- @tags[a[0].to_sym] = a[2]
65
- end
66
- end
67
-
68
- return true;
69
- end
70
-
71
- # flag parsing convenience methods
72
-
73
- def read_paired?
74
- @flag & $flags[1] != 0
75
- end
76
-
77
- def read_properly_paired?
78
- @flag & $flags[2] != 0
79
- end
80
-
81
- def read_unmapped?
82
- @flag & $flags[3] != 0
83
- end
84
-
85
- def mate_unmapped?
86
- @flag & $flags[4] != 0
87
- end
88
-
89
- def read_reverse_strand?
90
- @flag & $flags[5] != 0
91
- end
92
-
93
- def mate_reverse_strand?
94
- @flag & $flags[6] != 0
95
- end
96
-
97
- def first_in_pair?
98
- @flag & $flags[7] != 0
99
- end
100
-
101
- def second_in_pair?
102
- @flag & $flags[8] !=0
103
- end
104
-
105
- def primary_aln?
106
- (@flag & $flags[9]) == 0
107
- end
108
-
109
- def quality_fail?
110
- @flag & $flags[10] != 0
111
- end
1
+ require 'ffi'
2
+ require 'bettersam/library'
3
+ require 'bettersam/samtagset'
4
+ require 'bettersam/samrecord'
112
5
 
113
- def pcr_duplicate?
114
- @flag & $flags[11] != 0
115
- end
116
-
117
- # pair convenience methods
118
-
119
- def both_mapped?
120
- !(self.read_unmapped? && self.mate_unmapped?)
121
- end
122
-
123
- def pair_opposite_strands?
124
- (!self.read_reverse_strand? && self.mate_reverse_strand?) ||
125
- (self.read_reverse_strand? && !self.mate_reverse_strand?)
126
- end
127
-
128
- def pair_same_strand?
129
- !self.pair_opposite_strands?
130
- end
131
-
132
- def edit_distance
133
- @tags[:NM]
134
- end
135
-
136
- def length
137
- @length = @seq.length if !@length
138
- return @length
139
- end
140
-
141
- # cigar parsing methods
142
-
143
- def exact_match?
144
- @tags[:NM]==0
145
- end
146
-
147
- def endpos
148
- if !@cigar_list
149
- self.parse_cigar
150
- end
151
- e = @pos
152
- @cigar_list.each do |h|
153
- a = h.to_a
154
- bases = a[0][0]
155
- match = a[0][1]
156
- if match =~ /[MD]/
157
- e += bases
158
- end
159
- end
160
- return e
161
- end
162
-
163
- def parse_cigar
164
- str = @cigar
165
- l = str.length
166
- @cigar_list = []
167
- while str.length>0
168
- if str =~ /([0-9]+[MIDNSHPX=]+)/
169
- @cigar_list << {$1[0..-2].to_i => $1[-1]}
170
- str = str.slice($1.length, l)
171
- else
172
- puts str
173
- end
174
- end
175
- end
6
+ class BetterSam
176
7
 
177
- # snp storing
8
+ extend FFI::Library
178
9
 
179
- def contains_snp?(snp)
180
- snp >= @pos and snp < self.endpos
181
- end
10
+ ffi_lib Library.load
11
+ attach_function :sam_iterator, [SAMRecord], :int
12
+ attr_accessor :file
182
13
 
183
- def mark_snp(snp)
184
- if self.contains_snp?(snp)
185
- if !@cigar_list
186
- self.parse_cigar
187
- end
188
- p = @pos
189
- s = snp
190
- @cigar_list.each do |h|
191
- if p > s and s >= @pos
192
- @snp = s - @pos
193
- else
194
- a = h.to_a
195
- bases = a[0][0]
196
- match = a[0][1]
197
- if match == "M"
198
- p += bases
199
- elsif match == "I"
200
- s += bases
201
- elsif match == "D"
202
- s -= bases
203
- end
204
- end
205
- end
206
- if p > s and s >= @pos
207
- @snp = s - @pos
208
- end
209
- end
210
- @snp
14
+ def initialize file
15
+ self.file = file
211
16
  end
212
17
 
213
- def transfer_snp(bs) # load in another bettersam object
214
- if !self.read_unmapped? and !bs.read_unmapped?
215
- if (self.read_reverse_strand? and bs.read_reverse_strand?) or (!self.read_reverse_strand? and !bs.read_reverse_strand?)
216
- @snp = bs.snp
217
- else
218
- end
18
+ def each_record &block
19
+ if !File.exist?(self.file)
20
+ raise ArgumentError, "File #{self.file} does not exist"
219
21
  end
22
+ record = SAMRecord.new
23
+ record[:filename] = FFI::MemoryPointer.from_string(self.file)
24
+ result = nil
25
+ result = parse_sam(record, &block)
220
26
  end
221
27
 
222
- def put_snp # find the location of a snp on the genome
223
- if @snp
224
- if !@cigar_list
225
- self.parse_cigar
226
- end
227
- s = @snp
228
- p = 0
229
- @cigar_list.each do |h|
230
- if p > s
231
- return s+@pos
232
- else
233
- a = h.to_a
234
- bases = a[0][0]
235
- match = a[0][1]
236
- if match=="M"
237
- p += bases
238
- elsif match=="D"
239
- s += bases
240
- elsif match=="I"
241
- s -= bases
242
- end
243
- end
244
- end
245
- if p > s
246
- return s+@pos
247
- end
248
- else
249
- puts "need to run mark_snp and transfer_snp first"
250
- return nil
28
+ def parse_sam(record, &block)
29
+ while (result = BetterSam::sam_iterator(record)) == 1
30
+ yield record
251
31
  end
252
- return -1
253
32
  end
254
33
 
255
- def get_base_at(p)
256
- @seq[p]
257
- end
258
-
259
- private
260
-
261
- def int_or_neg1(x)
262
- Integer(x) rescue -1
263
- end
264
-
265
- def int_or_raw(x)
266
- Integer(x) rescue x
267
- end
268
-
269
-
270
34
  end
@@ -0,0 +1,25 @@
1
+ class BetterSam
2
+ class Library
3
+
4
+ require 'rbconfig'
5
+
6
+ def self.lib_extension
7
+ case RbConfig::CONFIG['host_os']
8
+ when /linux/ then return 'so'
9
+ when /darwin/ then return 'dylib'
10
+ else raise NotImplementedError,
11
+ "Native library is not available for Windows platform"
12
+ end
13
+ end
14
+
15
+ # Load the correct library for the OS system in use
16
+ # @return [String] the absolute path for the filename of the shared library
17
+ # @note this method is called automatically when the module is loaded
18
+ def self.load
19
+ path = File.expand_path File.dirname(__FILE__)
20
+ path.gsub!(/lib\/bettersam/,'ext')
21
+ File.join(path,"libbettersam.#{self.lib_extension}")
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,225 @@
1
+ class BetterSam
2
+
3
+ class SAMRecord < FFI::Struct
4
+
5
+ # meanings of SAM flag components, with index i
6
+ # being one more than the exponent 2 must be raised to to get the
7
+ # value (i.e. value = 2^(i+1))
8
+ $flags = [
9
+ nil,
10
+ 0x1, # 1. read paired
11
+ 0x2, # 2. read mapped in proper pair (i.e. with acceptable insert size)
12
+ 0x4, # 3. read unmapped
13
+ 0x8, # 4. mate unmapped
14
+ 0x10, # 5. read reverse strand
15
+ 0x20, # 6. mate reverse strand
16
+ 0x40, # 7. first in pair
17
+ 0x80, # 8. second in pair
18
+ 0x100, # 9. not primary alignment
19
+ 0x200, # 10. read fails platform/vendor quality checks
20
+ 0x400 # 11. read is PCR or optical duplicate
21
+ ]
22
+
23
+ # NOTE: fields must be in the same order
24
+ # here as they are in the struct definition
25
+ # in bettersam.h
26
+ layout :qname, :pointer,
27
+ :flag, :int,
28
+ :rname, :pointer,
29
+ :pos, :int,
30
+ :mapq, :int,
31
+ :cigar, :pointer,
32
+ :rnext, :pointer,
33
+ :pnext, :int,
34
+ :tlen, :int,
35
+ :seq, :pointer,
36
+ :qual, :pointer,
37
+ :tags, :pointer,
38
+ :filename, :pointer,
39
+ :line, :pointer,
40
+ :file, :pointer
41
+
42
+ attr_accessor :insert, :length, :snp
43
+ attr_reader :cigar_list
44
+
45
+ def qname
46
+ self[:qname].read_string
47
+ end
48
+
49
+ def name
50
+ qname
51
+ end
52
+
53
+ def flag
54
+ self[:flag]
55
+ end
56
+
57
+ def rname
58
+ self[:rname].read_string
59
+ end
60
+
61
+ def chrome
62
+ rname
63
+ end
64
+
65
+ def pos
66
+ self[:pos]
67
+ end
68
+
69
+ def mapq
70
+ self[:mapq]
71
+ end
72
+
73
+ def cigar
74
+ self[:cigar].read_string
75
+ end
76
+
77
+ def rnext
78
+ self[:rnext].read_string
79
+ end
80
+
81
+ def mchrom
82
+ rnext
83
+ end
84
+
85
+ def pnext
86
+ self[:pnext]
87
+ end
88
+
89
+ def mpos
90
+ pnext
91
+ end
92
+
93
+ def tlen
94
+ self[:tlen]
95
+ end
96
+
97
+ def seq
98
+ self[:seq].read_string
99
+ end
100
+
101
+ def qual
102
+ self[:qual].read_string
103
+ end
104
+
105
+ # returns a SAMTagSet object
106
+ def tags
107
+ if !@tags
108
+ @tags = BetterSam::SAMTagSet.new(self[:tags])
109
+ end
110
+ @tags
111
+ end
112
+
113
+ # basic flag convenience methods
114
+
115
+ def read_paired?
116
+ flag & $flags[1] != 0
117
+ end
118
+
119
+ def read_properly_paired?
120
+ flag & $flags[2] != 0
121
+ end
122
+
123
+ def read_unmapped?
124
+ flag & $flags[3] != 0
125
+ end
126
+
127
+ def mate_unmapped?
128
+ flag & $flags[4] != 0
129
+ end
130
+
131
+ def read_reverse_strand?
132
+ flag & $flags[5] != 0
133
+ end
134
+
135
+ def mate_reverse_strand?
136
+ flag & $flags[6] != 0
137
+ end
138
+
139
+ def first_in_pair?
140
+ flag & $flags[7] != 0
141
+ end
142
+
143
+ def second_in_pair?
144
+ flag & $flags[8] !=0
145
+ end
146
+
147
+ def primary_aln?
148
+ (flag & $flags[9]) == 0
149
+ end
150
+
151
+ def quality_fail?
152
+ flag & $flags[10] != 0
153
+ end
154
+
155
+ def pcr_duplicate?
156
+ flag & $flags[11] != 0
157
+ end
158
+
159
+ # pair convenience methods
160
+
161
+ def both_mapped?
162
+ !(read_unmapped? && mate_unmapped?)
163
+ end
164
+
165
+ def pair_opposite_strands?
166
+ (!read_reverse_strand? && mate_reverse_strand?) ||
167
+ (read_reverse_strand? && !mate_reverse_strand?)
168
+ end
169
+
170
+ def pair_same_strand?
171
+ !pair_opposite_strands?
172
+ end
173
+
174
+ def edit_distance
175
+ tags.nm
176
+ end
177
+
178
+ def length
179
+ @length = seq.length if !@length
180
+ return @length
181
+ end
182
+
183
+ # cigar parsing methods
184
+
185
+ def exact_match?
186
+ tags.nm==0 && cigar=="#{seq.length}M"
187
+ end
188
+
189
+ def endpos
190
+ if !@cigar_list
191
+ parse_cigar
192
+ end
193
+ e = pos
194
+ @cigar_list.each do |h|
195
+ a = h.to_a
196
+ bases = a[0][0]
197
+ match = a[0][1]
198
+ if match =~ /[MD]/
199
+ e += bases
200
+ end
201
+ end
202
+ e
203
+ end
204
+
205
+ def parse_cigar
206
+ str = cigar
207
+ l = str.length
208
+ @cigar_list = []
209
+ while str.length>0
210
+ if str =~ /([0-9]+[MIDNSHPX=]+)/
211
+ @cigar_list << {$1[0..-2].to_i => $1[-1]}
212
+ str = str.slice($1.length, l)
213
+ else
214
+ puts str
215
+ end
216
+ end
217
+ end
218
+
219
+ def get_base_at p
220
+ seq[p]
221
+ end
222
+
223
+ end
224
+
225
+ end
@@ -0,0 +1,23 @@
1
+ class BetterSam
2
+
3
+ class SAMTagSet < FFI::Struct
4
+
5
+ # NOTE: fields must be in the same order
6
+ # here as they are in the struct definition
7
+ # in bettersam.h
8
+ layout :xm, :int,
9
+ :nm, :int
10
+
11
+ # returns the number of mismatches in the alignment
12
+ def xm
13
+ self[:xm]
14
+ end
15
+
16
+ # returns the edit distance between query and target
17
+ def nm
18
+ self[:nm]
19
+ end
20
+
21
+ end
22
+
23
+ end
@@ -0,0 +1,6 @@
1
+ FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 nivara_3s 1572276 40 100M = 1571527 -849 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-24 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:0 MD:Z:1T1G3T0A91 YS:i:-5 YT:Z:DP
2
+ FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 chromosome03 1789384 24 4M5I91M = 1788782 -697 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-38 XN:i:0 XM:i:3 XO:i:1 XG:i:5 NM:i:8 MD:Z:0C1T6G85 YS:i:-5 YT:Z:DP
3
+ FCC00CKABXX:2:1101:19524:66398#CAGATCAT 145 chromosome03 1789377 23 4M1I2M1D93M = 1788766 -711 GGAGGATCGGGCCTCGTGGGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGA Bc`aaT\Y_]RLMKKMHEMV_T[Y[deaeeeaadbaaa\_feecedddddadfcegdcXdggcggggggggg`gfbecbcggggggggeggggggggggg AS:i:-51 XN:i:0 XM:i:6 XO:i:2 XG:i:2 NM:i:8 MD:Z:2T0C2^A2T0A5G1A81 YS:i:0 YT:Z:DP
4
+ FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 nivara_3s 1572267 23 5M2D3M2I3M1I86M = 1571498 -868 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-53 XN:i:0 XM:i:4 XO:i:3 XG:i:5 NM:i:9 MD:Z:2G2^TG3T5T0A81 YS:i:0 YT:Z:DP
5
+ FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 chromosome03 1789378 23 7M4I3M5I81M = 1788753 -716 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-59 XN:i:0 XM:i:4 XO:i:2 XG:i:9 NM:i:13 MD:Z:3A1A2T6G75 YS:i:0 YT:Z:DP
6
+ FCC2HFRACXX:7:2314:9299:67450#TGACCAAT 355 Sb02g000720.1 1186 18 71M = 1238 -150 CGTCATCTTCTCTCATATATTTGTATCACCCATCCATCCATCTGCCTTCGATATGCATCTCCACTCCGCCG __^cc]^\`eegea`ffdfghhfd]eghhfffef``degfhf_^gdfhfg_fghhhfdhffdfhffbeWcW AS:i:142 XN:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:71 YS:i:44 YT:Z:CP
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'helper'
4
+
5
+ class TestBetterSam < Test::Unit::TestCase
6
+
7
+ context "BetterSam" do
8
+
9
+ setup do
10
+ path = File.join(File.dirname(__FILE__), 'data', 'basic.sam')
11
+ @fs = BetterSam.new path
12
+ # this is run before each test
13
+ @l1 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 nivara_3s 1572276 40 100M = 1571527 -849 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-24 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:0 MD:Z:1T1G3T0A91 YS:i:-5 YT:Z:DP")
14
+ @l2 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 chromosome03 1789384 24 4M5I91M = 1788782 -697 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-38 XN:i:0 XM:i:3 XO:i:1 XG:i:5 NM:i:8 MD:Z:0C1T6G85 YS:i:-5 YT:Z:DP")
15
+ @l3 = BetterSam.new("FCC00CKABXX:2:1101:19524:66398#CAGATCAT 145 chromosome03 1789377 23 4M1I2M1D93M = 1788766 -711 GGAGGATCGGGCCTCGTGGGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGA Bc`aaT\Y_]RLMKKMHEMV_T[Y[deaeeeaadbaaa\_feecedddddadfcegdcXdggcggggggggg`gfbecbcggggggggeggggggggggg AS:i:-51 XN:i:0 XM:i:6 XO:i:2 XG:i:2 NM:i:8 MD:Z:2T0C2^A2T0A5G1A81 YS:i:0 YT:Z:DP")
16
+ @l4l = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 nivara_3s 1572267 23 5M2D3M2I3M1I86M = 1571498 -868 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-53 XN:i:0 XM:i:4 XO:i:3 XG:i:5 NM:i:9 MD:Z:2G2^TG3T5T0A81 YS:i:0 YT:Z:DP")
17
+ @l4r = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 chromosome03 1789378 23 7M4I3M5I81M = 1788753 -716 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-59 XN:i:0 XM:i:4 XO:i:2 XG:i:9 NM:i:13 MD:Z:3A1A2T6G75 YS:i:0 YT:Z:DP")
18
+ @l5l = BetterSam.new("FCC2HFRACXX:7:2314:9299:67450#TGACCAAT 355 Sb02g000720.1 1186 18 71M = 1238 -150 CGTCATCTTCTCTCATATATTTGTATCACCCATCCATCCATCTGCCTTCGATATGCATCTCCACTCCGCCG __^cc]^\`eegea`ffdfghhfd]eghhfffef``degfhf_^gdfhfg_fghhhfdhffdfhffbeWcW AS:i:142 XN:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:71 YS:i:44 YT:Z:CP") # NM:i:0
19
+ end
20
+
21
+ should "read a SAM file" do
22
+ expected = [
23
+ {:rname => "nivara_3s", :pos => 1572276, :xm => 4, :nm => 0},
24
+ {:rname => "chromosome03", :pos => 1789384, :xm => 3, :nm => 8},
25
+ {:rname => "chromosome03", :pos => 1789377, :xm => 6, :nm => 8},
26
+ {:rname => "nivara_3s", :pos => 1572267, :xm => 4, :nm => 9},
27
+ {:rname => "chromosome03", :pos => 1789378, :xm => 4, :nm => 13},
28
+ {:rname => "Sb02g000720.1", :pos => 1186, :xm => 0, :nm => 0}
29
+ ]
30
+ i = 0
31
+ @fs.each_record do |record|
32
+ assert_equal expected[i][:rname], record.rname, "chromosome"
33
+ assert_equal expected[i][:pos], record.pos, "position"
34
+ assert_equal expected[i][:xm], record.tags.xm, "mismatches"
35
+ assert_equal expected[i][:nm], record.tags.nm, "edit distance"
36
+ i += 1
37
+ end
38
+ end
39
+
40
+ should "detect a paired read" do
41
+ expected = [true, true, true, true, true, true]
42
+ i = 0
43
+ @fs.each_record do |record|
44
+ assert_equal expected[i], record.read_paired?, "record ##{i+1}"
45
+ i += 1
46
+ end
47
+ end
48
+
49
+ should "detect reverse strand" do
50
+ expected = [true, true, true, true, true, false]
51
+ i = 0
52
+ @fs.each_record do |record|
53
+ assert_equal expected[i], record.read_reverse_strand?, "record ##{i+1}"
54
+ i += 1
55
+ end
56
+ end
57
+
58
+ should "detect the first read in a pair" do
59
+ expected = [true, true, false, false, false, true]
60
+ i = 0
61
+ @fs.each_record do |record|
62
+ assert_equal expected[i], record.first_in_pair?, "record ##{i+1}"
63
+ i += 1
64
+ end
65
+ end
66
+
67
+ should "get the mapping position" do
68
+ expected = [1572276, 1789384, 1789377, 1572267, 1789378, 1186]
69
+ i = 0
70
+ @fs.each_record do |record|
71
+ assert_equal expected[i], record.pos, "record ##{i+1}"
72
+ i += 1
73
+ end
74
+ end
75
+
76
+ should "get the end position" do
77
+ expected = [1572276, 1789384, 1789377, 1572267, 1789378, 1186]
78
+ expected = expected.map { |x| x + 100 }
79
+ i = 0
80
+ @fs.each_record do |record|
81
+ assert_equal expected[i], record.endpos, "record ##{i+1}"
82
+ i += 1
83
+ end
84
+ end
85
+
86
+ should "detect exact matches" do
87
+ expected = [true, true, true, true, true, false]
88
+ i = 0
89
+ @fs.each_record do |record|
90
+ assert_equal expected[i], record.exact_match?, "record ##{i+1}"
91
+ i += 1
92
+ end
93
+ end
94
+
95
+ # should "give A" do
96
+ # assert @l1.get_base_at(0)=="A", "this is #{@l1.get_base_at(0)}, but should be A"
97
+ # end
98
+ #
99
+ # should "parse cigar strings correctly" do
100
+ # @l3.parse_cigar
101
+ # assert @l3.cigar_list.size == 5
102
+ # end
103
+ #
104
+ # should "parse another cigar string correctly" do
105
+ # @l4l.parse_cigar
106
+ # assert @l4l.cigar_list.size == 7
107
+ # end
108
+ #
109
+ # should "not be primary alignment" do
110
+ # assert !@l5l.primary_aln?
111
+ # end
112
+ #
113
+ # should "get the edit distance" do
114
+ # assert_equal 8, @l2.tags[:NM]
115
+ # end
116
+ #
117
+ # should "fail to get the edit distance" do
118
+ # assert_equal nil, @l5l.tags[:NM]
119
+ # end
120
+
121
+ end
122
+ end
metadata CHANGED
@@ -1,109 +1,122 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bettersam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Smith-Unna
8
8
  - Chris Boursnell
9
- - Jesse Rodriguez
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2014-08-05 00:00:00.000000000 Z
12
+ date: 2014-08-13 00:00:00.000000000 Z
14
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ffi
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
15
28
  - !ruby/object:Gem::Dependency
16
29
  name: simplecov
17
30
  requirement: !ruby/object:Gem::Requirement
18
31
  requirements:
19
- - - ~>
32
+ - - "~>"
20
33
  - !ruby/object:Gem::Version
21
34
  version: '0.8'
22
- - - '>='
35
+ - - ">="
23
36
  - !ruby/object:Gem::Version
24
37
  version: 0.8.2
25
38
  type: :development
26
39
  prerelease: false
27
40
  version_requirements: !ruby/object:Gem::Requirement
28
41
  requirements:
29
- - - ~>
42
+ - - "~>"
30
43
  - !ruby/object:Gem::Version
31
44
  version: '0.8'
32
- - - '>='
45
+ - - ">="
33
46
  - !ruby/object:Gem::Version
34
47
  version: 0.8.2
35
48
  - !ruby/object:Gem::Dependency
36
49
  name: rake
37
50
  requirement: !ruby/object:Gem::Requirement
38
51
  requirements:
39
- - - ~>
52
+ - - "~>"
40
53
  - !ruby/object:Gem::Version
41
54
  version: '10.3'
42
- - - '>='
55
+ - - ">="
43
56
  - !ruby/object:Gem::Version
44
57
  version: 10.3.2
45
58
  type: :development
46
59
  prerelease: false
47
60
  version_requirements: !ruby/object:Gem::Requirement
48
61
  requirements:
49
- - - ~>
62
+ - - "~>"
50
63
  - !ruby/object:Gem::Version
51
64
  version: '10.3'
52
- - - '>='
65
+ - - ">="
53
66
  - !ruby/object:Gem::Version
54
67
  version: 10.3.2
55
68
  - !ruby/object:Gem::Dependency
56
69
  name: turn
57
70
  requirement: !ruby/object:Gem::Requirement
58
71
  requirements:
59
- - - ~>
72
+ - - "~>"
60
73
  - !ruby/object:Gem::Version
61
74
  version: '0.9'
62
- - - '>='
75
+ - - ">="
63
76
  - !ruby/object:Gem::Version
64
77
  version: 0.9.7
65
78
  type: :development
66
79
  prerelease: false
67
80
  version_requirements: !ruby/object:Gem::Requirement
68
81
  requirements:
69
- - - ~>
82
+ - - "~>"
70
83
  - !ruby/object:Gem::Version
71
84
  version: '0.9'
72
- - - '>='
85
+ - - ">="
73
86
  - !ruby/object:Gem::Version
74
87
  version: 0.9.7
75
88
  - !ruby/object:Gem::Dependency
76
89
  name: shoulda-context
77
90
  requirement: !ruby/object:Gem::Requirement
78
91
  requirements:
79
- - - ~>
92
+ - - "~>"
80
93
  - !ruby/object:Gem::Version
81
94
  version: '1.2'
82
- - - '>='
95
+ - - ">="
83
96
  - !ruby/object:Gem::Version
84
97
  version: 1.2.1
85
98
  type: :development
86
99
  prerelease: false
87
100
  version_requirements: !ruby/object:Gem::Requirement
88
101
  requirements:
89
- - - ~>
102
+ - - "~>"
90
103
  - !ruby/object:Gem::Version
91
104
  version: '1.2'
92
- - - '>='
105
+ - - ">="
93
106
  - !ruby/object:Gem::Version
94
107
  version: 1.2.1
95
108
  - !ruby/object:Gem::Dependency
96
109
  name: coveralls
97
110
  requirement: !ruby/object:Gem::Requirement
98
111
  requirements:
99
- - - ~>
112
+ - - "~>"
100
113
  - !ruby/object:Gem::Version
101
114
  version: '0.7'
102
115
  type: :development
103
116
  prerelease: false
104
117
  version_requirements: !ruby/object:Gem::Requirement
105
118
  requirements:
106
- - - ~>
119
+ - - "~>"
107
120
  - !ruby/object:Gem::Version
108
121
  version: '0.7'
109
122
  description: Extended SAM (Sequence Alignment/Map) file parsing
@@ -112,12 +125,16 @@ executables: []
112
125
  extensions: []
113
126
  extra_rdoc_files: []
114
127
  files:
128
+ - LICENSE
129
+ - README.md
115
130
  - Rakefile
116
131
  - lib/bettersam.rb
117
- - test/test_test.rb
132
+ - lib/bettersam/library.rb
133
+ - lib/bettersam/samrecord.rb
134
+ - lib/bettersam/samtagset.rb
135
+ - test/data/basic.sam
118
136
  - test/helper.rb
119
- - README.md
120
- - LICENSE
137
+ - test/test_bettersam.rb
121
138
  homepage: https://github.com/blahah/bettersam
122
139
  licenses:
123
140
  - MIT
@@ -128,19 +145,18 @@ require_paths:
128
145
  - lib
129
146
  required_ruby_version: !ruby/object:Gem::Requirement
130
147
  requirements:
131
- - - '>='
148
+ - - ">="
132
149
  - !ruby/object:Gem::Version
133
150
  version: '0'
134
151
  required_rubygems_version: !ruby/object:Gem::Requirement
135
152
  requirements:
136
- - - '>='
153
+ - - ">="
137
154
  - !ruby/object:Gem::Version
138
155
  version: '0'
139
156
  requirements: []
140
157
  rubyforge_project:
141
- rubygems_version: 2.1.4
158
+ rubygems_version: 2.2.2
142
159
  signing_key:
143
160
  specification_version: 4
144
161
  summary: Extended SAM file parsing
145
162
  test_files: []
146
- has_rdoc:
data/test/test_test.rb DELETED
@@ -1,106 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'helper'
4
-
5
- class TestBetterSam < Test::Unit::TestCase
6
-
7
- context "BetterSam" do
8
-
9
- setup do
10
- # this is run before each test
11
- @l1 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 nivara_3s 1572276 40 100M = 1571527 -849 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-24 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:0 MD:Z:1T1G3T0A91 YS:i:-5 YT:Z:DP")
12
- @l2 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 chromosome03 1789384 24 4M5I91M = 1788782 -697 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-38 XN:i:0 XM:i:3 XO:i:1 XG:i:5 NM:i:8 MD:Z:0C1T6G85 YS:i:-5 YT:Z:DP")
13
- @l3 = BetterSam.new("FCC00CKABXX:2:1101:19524:66398#CAGATCAT 145 chromosome03 1789377 23 4M1I2M1D93M = 1788766 -711 GGAGGATCGGGCCTCGTGGGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGA Bc`aaT\Y_]RLMKKMHEMV_T[Y[deaeeeaadbaaa\_feecedddddadfcegdcXdggcggggggggg`gfbecbcggggggggeggggggggggg AS:i:-51 XN:i:0 XM:i:6 XO:i:2 XG:i:2 NM:i:8 MD:Z:2T0C2^A2T0A5G1A81 YS:i:0 YT:Z:DP")
14
- @l4l = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 nivara_3s 1572267 23 5M2D3M2I3M1I86M = 1571498 -868 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-53 XN:i:0 XM:i:4 XO:i:3 XG:i:5 NM:i:9 MD:Z:2G2^TG3T5T0A81 YS:i:0 YT:Z:DP")
15
- @l4r = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 chromosome03 1789378 23 7M4I3M5I81M = 1788753 -716 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-59 XN:i:0 XM:i:4 XO:i:2 XG:i:9 NM:i:13 MD:Z:3A1A2T6G75 YS:i:0 YT:Z:DP")
16
- @l5l = BetterSam.new("FCC2HFRACXX:7:2314:9299:67450#TGACCAAT 355 Sb02g000720.1 1186 18 71M = 1238 -150 CGTCATCTTCTCTCATATATTTGTATCACCCATCCATCCATCTGCCTTCGATATGCATCTCCACTCCGCCG __^cc]^\`eegea`ffdfghhfd]eghhfffef``degfhf_^gdfhfg_fghhhfdhffdfhffbeWcW AS:i:142 XN:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:71 YS:i:44 YT:Z:CP") # NM:i:0
17
- end
18
-
19
- should "be a paired read" do
20
- assert @l1.read_paired?
21
- end
22
-
23
- should "be on the reverse strand" do
24
- assert @l1.read_reverse_strand?
25
- end
26
-
27
- should "be the first pair" do
28
- assert @l1.first_in_pair?
29
- end
30
-
31
- should "get the mappping position" do
32
- assert @l1.pos == 1572276
33
- end
34
-
35
- should "get the end position" do
36
- assert @l1.endpos == 1572376
37
- end
38
-
39
- should "get the end position too" do
40
- assert @l2.endpos == 1789479, "this is #{@l1.endpos} but should be 1789479"
41
- end
42
-
43
- should "be exact match" do
44
- assert @l1.exact_match?
45
- end
46
-
47
- should "contain snp" do
48
- assert @l1.contains_snp?(1572283)
49
- end
50
-
51
- should "give A" do
52
- assert @l1.get_base_at(0)=="A", "this is #{@l1.get_base_at(0)}, but should be A"
53
- end
54
-
55
- should "parse the cigar string correctly" do
56
- @l3.parse_cigar
57
- assert @l3.cigar_list.size == 5
58
- end
59
-
60
- should "parse another cigar string correctly" do
61
- @l4l.parse_cigar
62
- assert @l4l.cigar_list.size == 7
63
- end
64
-
65
- should "mark a snp" do
66
- assert @l1.mark_snp(1572283)==7, "this is #{@l1.mark_snp(1572283)}, but i think it should be 7"
67
- end
68
-
69
- should "mark yet another snp" do
70
- assert @l4l.mark_snp(1572283)==17, "this is #{@l4l.mark_snp(1572283)}, but i think it should be 17"
71
- end
72
-
73
- should "transfer a snp from one object to another" do
74
- @l1.mark_snp(1572283)
75
- @l2.transfer_snp(@l1)
76
- assert @l2.snp == 7, "this is #{@l2.snp}, but i think it should be 7"
77
- end
78
-
79
- should "mark another snp" do
80
- assert @l2.mark_snp(1789386)==2, "this is #{@l2.mark_snp(1789386)}, but i think it should be 3"
81
- end
82
-
83
- should "mark a third snp" do
84
- assert @l3.mark_snp(1789386)==9, "this is #{@l3.mark_snp(1789386)}, but i think it should be 9"
85
- end
86
-
87
- should "find the location of a snp on the genome" do
88
- @l4l.mark_snp(1572283)
89
- @l4r.transfer_snp(@l4l)
90
- assert @l4r.put_snp==1789386, "this is #{@l4r.put_snp}, but I think it should be 1789386"
91
- end
92
-
93
- should "not be primary alignment" do
94
- assert !@l5l.primary_aln?
95
- end
96
-
97
- should "get the edit distance" do
98
- assert_equal 8, @l2.tags[:NM]
99
- end
100
-
101
- should "fail to get the edit distance" do
102
- assert_equal nil, @l5l.tags[:NM]
103
- end
104
-
105
- end
106
- end