bettersam 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: adf55f7adc240db5cee1c5f188f3abd5798fbb98
4
- data.tar.gz: 49f7b0e878392c32e49f18420777987fdcb252fc
3
+ metadata.gz: ebf104198d7b3ce4721ee95da8e6364049652351
4
+ data.tar.gz: 65fc1835e6563e44febdf76f9cc1feb7cd0c16b0
5
5
  SHA512:
6
- metadata.gz: 74cbfbabd15fca7d5e9d3568b2f2fcb7ddb363f2af9df94db793f877a98565272db38a60000bef79b7b4324a2850afefc111d063148eccefa822ddb4cb6905b3
7
- data.tar.gz: b73440febce482e72fecc812e0afde3478743e128a9bf43f1f6842cd1e28fff1b70786b8fae13a9f60c46803b7ab4ce29e3e844d5d2880e82cab8c23f4407dc3
6
+ metadata.gz: 0fc83632508d5254d030ff3e960db2a6c162fa6297fa0468888d32f377b4e60c003c3d08ef700eb1fda1cf92fa6fa51e5a05f8435ba154728c1e6f71f768e125
7
+ data.tar.gz: c6bfd6f772918cdfb08efaac2af693d23c8938924862f5aa68a87077183576a9d4c4c950c8a018f964b8f461bd0eb8248840f72ec49a69c6fb91fa594117c341
data/Rakefile CHANGED
@@ -5,5 +5,15 @@ Rake::TestTask.new do |t|
5
5
  end
6
6
 
7
7
  desc "Run tests"
8
- task :default => :test
8
+ task :default => ["ext:build", :test]
9
9
 
10
+ namespace :ext do
11
+ desc "Build native extension"
12
+ task :build do
13
+ cd "ext"
14
+ ruby "mkrf_conf.rb"
15
+ sh "rake"
16
+ cd ".."
17
+ end
18
+
19
+ end
data/lib/bettersam.rb CHANGED
@@ -1,270 +1,34 @@
1
- class BetterSam
2
-
3
- # meanings of SAM flag components, with index i
4
- # being one more than the exponent 2 must be raised to to get the
5
- # value (i.e. value = 2^(i+1))
6
- $flags = [
7
- nil,
8
- 0x1, # 1. read paired
9
- 0x2, # 2. read mapped in proper pair (i.e. with acceptable insert size)
10
- 0x4, # 3. read unmapped
11
- 0x8, # 4. mate unmapped
12
- 0x10, # 5. read reverse strand
13
- 0x20, # 6. mate reverse strand
14
- 0x40, # 7. first in pair
15
- 0x80, # 8. second in pair
16
- 0x100, # 9. not primary alignment
17
- 0x200, # 10. read fails platform/vendor quality checks
18
- 0x400] # 11. read is PCR or optical duplicate
19
-
20
- public
21
- attr_accessor :name, :flag, :chrom, :pos, :mapq, :cigar, :mchrom, :mpos
22
- attr_accessor :insert, :seq, :qual, :tags, :length
23
- attr_accessor :snp
24
- attr_reader :cigar_list
25
-
26
- def initialize(line=nil)
27
- @tags = {}
28
- parse_line(line) unless line.nil?
29
- end
30
-
31
- def parse_line(line)
32
- return false if line[0] == "@"
33
-
34
- f = line.chomp.split("\t", -1)
35
- raise "SAM lines must have at least 11 fields (had #{f.size})" if f.size < 11
36
-
37
- # colnames = %w(1:name 2:flag 3:chr 4:pos 5:mapq 6:cigar 7:mchr 8:mpos 9:insrt 10:seq 11:qual)
38
-
39
- @name = f[0]
40
- @flag = int_or_raw(f[1])
41
- @chrom = f[2]
42
- @pos = int_or_neg1(f[3])
43
- @mapq = int_or_neg1(f[4])
44
- @cigar = f[5]
45
- @mchrom = f[6]
46
- @mpos = int_or_neg1(f[7])
47
- @insert = int_or_raw(f[8])
48
- @seq = f[9]
49
- @qual = f[10]
50
- @length = nil
51
-
52
- @tags = {}
53
- i = 11
54
- while i < f.size
55
- tag = f[i]
56
- i += 1
57
- a = tag.split(":")
58
- raise line if a.length != 3
59
- if a[1]=="i"
60
- @tags[a[0].to_sym] = a[2].to_i
61
- elsif a[1]=="Z"
62
- @tags[a[0].to_sym] = a[2]
63
- else
64
- @tags[a[0].to_sym] = a[2]
65
- end
66
- end
67
-
68
- return true;
69
- end
70
-
71
- # flag parsing convenience methods
72
-
73
- def read_paired?
74
- @flag & $flags[1] != 0
75
- end
76
-
77
- def read_properly_paired?
78
- @flag & $flags[2] != 0
79
- end
80
-
81
- def read_unmapped?
82
- @flag & $flags[3] != 0
83
- end
84
-
85
- def mate_unmapped?
86
- @flag & $flags[4] != 0
87
- end
88
-
89
- def read_reverse_strand?
90
- @flag & $flags[5] != 0
91
- end
92
-
93
- def mate_reverse_strand?
94
- @flag & $flags[6] != 0
95
- end
96
-
97
- def first_in_pair?
98
- @flag & $flags[7] != 0
99
- end
100
-
101
- def second_in_pair?
102
- @flag & $flags[8] !=0
103
- end
104
-
105
- def primary_aln?
106
- (@flag & $flags[9]) == 0
107
- end
108
-
109
- def quality_fail?
110
- @flag & $flags[10] != 0
111
- end
1
+ require 'ffi'
2
+ require 'bettersam/library'
3
+ require 'bettersam/samtagset'
4
+ require 'bettersam/samrecord'
112
5
 
113
- def pcr_duplicate?
114
- @flag & $flags[11] != 0
115
- end
116
-
117
- # pair convenience methods
118
-
119
- def both_mapped?
120
- !(self.read_unmapped? && self.mate_unmapped?)
121
- end
122
-
123
- def pair_opposite_strands?
124
- (!self.read_reverse_strand? && self.mate_reverse_strand?) ||
125
- (self.read_reverse_strand? && !self.mate_reverse_strand?)
126
- end
127
-
128
- def pair_same_strand?
129
- !self.pair_opposite_strands?
130
- end
131
-
132
- def edit_distance
133
- @tags[:NM]
134
- end
135
-
136
- def length
137
- @length = @seq.length if !@length
138
- return @length
139
- end
140
-
141
- # cigar parsing methods
142
-
143
- def exact_match?
144
- @tags[:NM]==0
145
- end
146
-
147
- def endpos
148
- if !@cigar_list
149
- self.parse_cigar
150
- end
151
- e = @pos
152
- @cigar_list.each do |h|
153
- a = h.to_a
154
- bases = a[0][0]
155
- match = a[0][1]
156
- if match =~ /[MD]/
157
- e += bases
158
- end
159
- end
160
- return e
161
- end
162
-
163
- def parse_cigar
164
- str = @cigar
165
- l = str.length
166
- @cigar_list = []
167
- while str.length>0
168
- if str =~ /([0-9]+[MIDNSHPX=]+)/
169
- @cigar_list << {$1[0..-2].to_i => $1[-1]}
170
- str = str.slice($1.length, l)
171
- else
172
- puts str
173
- end
174
- end
175
- end
6
+ class BetterSam
176
7
 
177
- # snp storing
8
+ extend FFI::Library
178
9
 
179
- def contains_snp?(snp)
180
- snp >= @pos and snp < self.endpos
181
- end
10
+ ffi_lib Library.load
11
+ attach_function :sam_iterator, [SAMRecord], :int
12
+ attr_accessor :file
182
13
 
183
- def mark_snp(snp)
184
- if self.contains_snp?(snp)
185
- if !@cigar_list
186
- self.parse_cigar
187
- end
188
- p = @pos
189
- s = snp
190
- @cigar_list.each do |h|
191
- if p > s and s >= @pos
192
- @snp = s - @pos
193
- else
194
- a = h.to_a
195
- bases = a[0][0]
196
- match = a[0][1]
197
- if match == "M"
198
- p += bases
199
- elsif match == "I"
200
- s += bases
201
- elsif match == "D"
202
- s -= bases
203
- end
204
- end
205
- end
206
- if p > s and s >= @pos
207
- @snp = s - @pos
208
- end
209
- end
210
- @snp
14
+ def initialize file
15
+ self.file = file
211
16
  end
212
17
 
213
- def transfer_snp(bs) # load in another bettersam object
214
- if !self.read_unmapped? and !bs.read_unmapped?
215
- if (self.read_reverse_strand? and bs.read_reverse_strand?) or (!self.read_reverse_strand? and !bs.read_reverse_strand?)
216
- @snp = bs.snp
217
- else
218
- end
18
+ def each_record &block
19
+ if !File.exist?(self.file)
20
+ raise ArgumentError, "File #{self.file} does not exist"
219
21
  end
22
+ record = SAMRecord.new
23
+ record[:filename] = FFI::MemoryPointer.from_string(self.file)
24
+ result = nil
25
+ result = parse_sam(record, &block)
220
26
  end
221
27
 
222
- def put_snp # find the location of a snp on the genome
223
- if @snp
224
- if !@cigar_list
225
- self.parse_cigar
226
- end
227
- s = @snp
228
- p = 0
229
- @cigar_list.each do |h|
230
- if p > s
231
- return s+@pos
232
- else
233
- a = h.to_a
234
- bases = a[0][0]
235
- match = a[0][1]
236
- if match=="M"
237
- p += bases
238
- elsif match=="D"
239
- s += bases
240
- elsif match=="I"
241
- s -= bases
242
- end
243
- end
244
- end
245
- if p > s
246
- return s+@pos
247
- end
248
- else
249
- puts "need to run mark_snp and transfer_snp first"
250
- return nil
28
+ def parse_sam(record, &block)
29
+ while (result = BetterSam::sam_iterator(record)) == 1
30
+ yield record
251
31
  end
252
- return -1
253
32
  end
254
33
 
255
- def get_base_at(p)
256
- @seq[p]
257
- end
258
-
259
- private
260
-
261
- def int_or_neg1(x)
262
- Integer(x) rescue -1
263
- end
264
-
265
- def int_or_raw(x)
266
- Integer(x) rescue x
267
- end
268
-
269
-
270
34
  end
@@ -0,0 +1,25 @@
1
+ class BetterSam
2
+ class Library
3
+
4
+ require 'rbconfig'
5
+
6
+ def self.lib_extension
7
+ case RbConfig::CONFIG['host_os']
8
+ when /linux/ then return 'so'
9
+ when /darwin/ then return 'dylib'
10
+ else raise NotImplementedError,
11
+ "Native library is not available for Windows platform"
12
+ end
13
+ end
14
+
15
+ # Load the correct library for the OS system in use
16
+ # @return [String] the absolute path for the filename of the shared library
17
+ # @note this method is called automatically when the module is loaded
18
+ def self.load
19
+ path = File.expand_path File.dirname(__FILE__)
20
+ path.gsub!(/lib\/bettersam/,'ext')
21
+ File.join(path,"libbettersam.#{self.lib_extension}")
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,225 @@
1
+ class BetterSam
2
+
3
+ class SAMRecord < FFI::Struct
4
+
5
+ # meanings of SAM flag components, with index i
6
+ # being one more than the exponent 2 must be raised to to get the
7
+ # value (i.e. value = 2^(i+1))
8
+ $flags = [
9
+ nil,
10
+ 0x1, # 1. read paired
11
+ 0x2, # 2. read mapped in proper pair (i.e. with acceptable insert size)
12
+ 0x4, # 3. read unmapped
13
+ 0x8, # 4. mate unmapped
14
+ 0x10, # 5. read reverse strand
15
+ 0x20, # 6. mate reverse strand
16
+ 0x40, # 7. first in pair
17
+ 0x80, # 8. second in pair
18
+ 0x100, # 9. not primary alignment
19
+ 0x200, # 10. read fails platform/vendor quality checks
20
+ 0x400 # 11. read is PCR or optical duplicate
21
+ ]
22
+
23
+ # NOTE: fields must be in the same order
24
+ # here as they are in the struct definition
25
+ # in bettersam.h
26
+ layout :qname, :pointer,
27
+ :flag, :int,
28
+ :rname, :pointer,
29
+ :pos, :int,
30
+ :mapq, :int,
31
+ :cigar, :pointer,
32
+ :rnext, :pointer,
33
+ :pnext, :int,
34
+ :tlen, :int,
35
+ :seq, :pointer,
36
+ :qual, :pointer,
37
+ :tags, :pointer,
38
+ :filename, :pointer,
39
+ :line, :pointer,
40
+ :file, :pointer
41
+
42
+ attr_accessor :insert, :length, :snp
43
+ attr_reader :cigar_list
44
+
45
+ def qname
46
+ self[:qname].read_string
47
+ end
48
+
49
+ def name
50
+ qname
51
+ end
52
+
53
+ def flag
54
+ self[:flag]
55
+ end
56
+
57
+ def rname
58
+ self[:rname].read_string
59
+ end
60
+
61
+ def chrome
62
+ rname
63
+ end
64
+
65
+ def pos
66
+ self[:pos]
67
+ end
68
+
69
+ def mapq
70
+ self[:mapq]
71
+ end
72
+
73
+ def cigar
74
+ self[:cigar].read_string
75
+ end
76
+
77
+ def rnext
78
+ self[:rnext].read_string
79
+ end
80
+
81
+ def mchrom
82
+ rnext
83
+ end
84
+
85
+ def pnext
86
+ self[:pnext]
87
+ end
88
+
89
+ def mpos
90
+ pnext
91
+ end
92
+
93
+ def tlen
94
+ self[:tlen]
95
+ end
96
+
97
+ def seq
98
+ self[:seq].read_string
99
+ end
100
+
101
+ def qual
102
+ self[:qual].read_string
103
+ end
104
+
105
+ # returns a SAMTagSet object
106
+ def tags
107
+ if !@tags
108
+ @tags = BetterSam::SAMTagSet.new(self[:tags])
109
+ end
110
+ @tags
111
+ end
112
+
113
+ # basic flag convenience methods
114
+
115
+ def read_paired?
116
+ flag & $flags[1] != 0
117
+ end
118
+
119
+ def read_properly_paired?
120
+ flag & $flags[2] != 0
121
+ end
122
+
123
+ def read_unmapped?
124
+ flag & $flags[3] != 0
125
+ end
126
+
127
+ def mate_unmapped?
128
+ flag & $flags[4] != 0
129
+ end
130
+
131
+ def read_reverse_strand?
132
+ flag & $flags[5] != 0
133
+ end
134
+
135
+ def mate_reverse_strand?
136
+ flag & $flags[6] != 0
137
+ end
138
+
139
+ def first_in_pair?
140
+ flag & $flags[7] != 0
141
+ end
142
+
143
+ def second_in_pair?
144
+ flag & $flags[8] !=0
145
+ end
146
+
147
+ def primary_aln?
148
+ (flag & $flags[9]) == 0
149
+ end
150
+
151
+ def quality_fail?
152
+ flag & $flags[10] != 0
153
+ end
154
+
155
+ def pcr_duplicate?
156
+ flag & $flags[11] != 0
157
+ end
158
+
159
+ # pair convenience methods
160
+
161
+ def both_mapped?
162
+ !(read_unmapped? && mate_unmapped?)
163
+ end
164
+
165
+ def pair_opposite_strands?
166
+ (!read_reverse_strand? && mate_reverse_strand?) ||
167
+ (read_reverse_strand? && !mate_reverse_strand?)
168
+ end
169
+
170
+ def pair_same_strand?
171
+ !pair_opposite_strands?
172
+ end
173
+
174
+ def edit_distance
175
+ tags.nm
176
+ end
177
+
178
+ def length
179
+ @length = seq.length if !@length
180
+ return @length
181
+ end
182
+
183
+ # cigar parsing methods
184
+
185
+ def exact_match?
186
+ tags.nm==0 && cigar=="#{seq.length}M"
187
+ end
188
+
189
+ def endpos
190
+ if !@cigar_list
191
+ parse_cigar
192
+ end
193
+ e = pos
194
+ @cigar_list.each do |h|
195
+ a = h.to_a
196
+ bases = a[0][0]
197
+ match = a[0][1]
198
+ if match =~ /[MD]/
199
+ e += bases
200
+ end
201
+ end
202
+ e
203
+ end
204
+
205
+ def parse_cigar
206
+ str = cigar
207
+ l = str.length
208
+ @cigar_list = []
209
+ while str.length>0
210
+ if str =~ /([0-9]+[MIDNSHPX=]+)/
211
+ @cigar_list << {$1[0..-2].to_i => $1[-1]}
212
+ str = str.slice($1.length, l)
213
+ else
214
+ puts str
215
+ end
216
+ end
217
+ end
218
+
219
+ def get_base_at p
220
+ seq[p]
221
+ end
222
+
223
+ end
224
+
225
+ end
@@ -0,0 +1,23 @@
1
+ class BetterSam
2
+
3
+ class SAMTagSet < FFI::Struct
4
+
5
+ # NOTE: fields must be in the same order
6
+ # here as they are in the struct definition
7
+ # in bettersam.h
8
+ layout :xm, :int,
9
+ :nm, :int
10
+
11
+ # returns the number of mismatches in the alignment
12
+ def xm
13
+ self[:xm]
14
+ end
15
+
16
+ # returns the edit distance between query and target
17
+ def nm
18
+ self[:nm]
19
+ end
20
+
21
+ end
22
+
23
+ end
@@ -0,0 +1,6 @@
1
+ FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 nivara_3s 1572276 40 100M = 1571527 -849 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-24 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:0 MD:Z:1T1G3T0A91 YS:i:-5 YT:Z:DP
2
+ FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 chromosome03 1789384 24 4M5I91M = 1788782 -697 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-38 XN:i:0 XM:i:3 XO:i:1 XG:i:5 NM:i:8 MD:Z:0C1T6G85 YS:i:-5 YT:Z:DP
3
+ FCC00CKABXX:2:1101:19524:66398#CAGATCAT 145 chromosome03 1789377 23 4M1I2M1D93M = 1788766 -711 GGAGGATCGGGCCTCGTGGGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGA Bc`aaT\Y_]RLMKKMHEMV_T[Y[deaeeeaadbaaa\_feecedddddadfcegdcXdggcggggggggg`gfbecbcggggggggeggggggggggg AS:i:-51 XN:i:0 XM:i:6 XO:i:2 XG:i:2 NM:i:8 MD:Z:2T0C2^A2T0A5G1A81 YS:i:0 YT:Z:DP
4
+ FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 nivara_3s 1572267 23 5M2D3M2I3M1I86M = 1571498 -868 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-53 XN:i:0 XM:i:4 XO:i:3 XG:i:5 NM:i:9 MD:Z:2G2^TG3T5T0A81 YS:i:0 YT:Z:DP
5
+ FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 chromosome03 1789378 23 7M4I3M5I81M = 1788753 -716 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-59 XN:i:0 XM:i:4 XO:i:2 XG:i:9 NM:i:13 MD:Z:3A1A2T6G75 YS:i:0 YT:Z:DP
6
+ FCC2HFRACXX:7:2314:9299:67450#TGACCAAT 355 Sb02g000720.1 1186 18 71M = 1238 -150 CGTCATCTTCTCTCATATATTTGTATCACCCATCCATCCATCTGCCTTCGATATGCATCTCCACTCCGCCG __^cc]^\`eegea`ffdfghhfd]eghhfffef``degfhf_^gdfhfg_fghhhfdhffdfhffbeWcW AS:i:142 XN:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:71 YS:i:44 YT:Z:CP
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'helper'
4
+
5
+ class TestBetterSam < Test::Unit::TestCase
6
+
7
+ context "BetterSam" do
8
+
9
+ setup do
10
+ path = File.join(File.dirname(__FILE__), 'data', 'basic.sam')
11
+ @fs = BetterSam.new path
12
+ # this is run before each test
13
+ @l1 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 nivara_3s 1572276 40 100M = 1571527 -849 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-24 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:0 MD:Z:1T1G3T0A91 YS:i:-5 YT:Z:DP")
14
+ @l2 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 chromosome03 1789384 24 4M5I91M = 1788782 -697 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-38 XN:i:0 XM:i:3 XO:i:1 XG:i:5 NM:i:8 MD:Z:0C1T6G85 YS:i:-5 YT:Z:DP")
15
+ @l3 = BetterSam.new("FCC00CKABXX:2:1101:19524:66398#CAGATCAT 145 chromosome03 1789377 23 4M1I2M1D93M = 1788766 -711 GGAGGATCGGGCCTCGTGGGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGA Bc`aaT\Y_]RLMKKMHEMV_T[Y[deaeeeaadbaaa\_feecedddddadfcegdcXdggcggggggggg`gfbecbcggggggggeggggggggggg AS:i:-51 XN:i:0 XM:i:6 XO:i:2 XG:i:2 NM:i:8 MD:Z:2T0C2^A2T0A5G1A81 YS:i:0 YT:Z:DP")
16
+ @l4l = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 nivara_3s 1572267 23 5M2D3M2I3M1I86M = 1571498 -868 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-53 XN:i:0 XM:i:4 XO:i:3 XG:i:5 NM:i:9 MD:Z:2G2^TG3T5T0A81 YS:i:0 YT:Z:DP")
17
+ @l4r = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 chromosome03 1789378 23 7M4I3M5I81M = 1788753 -716 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-59 XN:i:0 XM:i:4 XO:i:2 XG:i:9 NM:i:13 MD:Z:3A1A2T6G75 YS:i:0 YT:Z:DP")
18
+ @l5l = BetterSam.new("FCC2HFRACXX:7:2314:9299:67450#TGACCAAT 355 Sb02g000720.1 1186 18 71M = 1238 -150 CGTCATCTTCTCTCATATATTTGTATCACCCATCCATCCATCTGCCTTCGATATGCATCTCCACTCCGCCG __^cc]^\`eegea`ffdfghhfd]eghhfffef``degfhf_^gdfhfg_fghhhfdhffdfhffbeWcW AS:i:142 XN:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:71 YS:i:44 YT:Z:CP") # NM:i:0
19
+ end
20
+
21
+ should "read a SAM file" do
22
+ expected = [
23
+ {:rname => "nivara_3s", :pos => 1572276, :xm => 4, :nm => 0},
24
+ {:rname => "chromosome03", :pos => 1789384, :xm => 3, :nm => 8},
25
+ {:rname => "chromosome03", :pos => 1789377, :xm => 6, :nm => 8},
26
+ {:rname => "nivara_3s", :pos => 1572267, :xm => 4, :nm => 9},
27
+ {:rname => "chromosome03", :pos => 1789378, :xm => 4, :nm => 13},
28
+ {:rname => "Sb02g000720.1", :pos => 1186, :xm => 0, :nm => 0}
29
+ ]
30
+ i = 0
31
+ @fs.each_record do |record|
32
+ assert_equal expected[i][:rname], record.rname, "chromosome"
33
+ assert_equal expected[i][:pos], record.pos, "position"
34
+ assert_equal expected[i][:xm], record.tags.xm, "mismatches"
35
+ assert_equal expected[i][:nm], record.tags.nm, "edit distance"
36
+ i += 1
37
+ end
38
+ end
39
+
40
+ should "detect a paired read" do
41
+ expected = [true, true, true, true, true, true]
42
+ i = 0
43
+ @fs.each_record do |record|
44
+ assert_equal expected[i], record.read_paired?, "record ##{i+1}"
45
+ i += 1
46
+ end
47
+ end
48
+
49
+ should "detect reverse strand" do
50
+ expected = [true, true, true, true, true, false]
51
+ i = 0
52
+ @fs.each_record do |record|
53
+ assert_equal expected[i], record.read_reverse_strand?, "record ##{i+1}"
54
+ i += 1
55
+ end
56
+ end
57
+
58
+ should "detect the first read in a pair" do
59
+ expected = [true, true, false, false, false, true]
60
+ i = 0
61
+ @fs.each_record do |record|
62
+ assert_equal expected[i], record.first_in_pair?, "record ##{i+1}"
63
+ i += 1
64
+ end
65
+ end
66
+
67
+ should "get the mapping position" do
68
+ expected = [1572276, 1789384, 1789377, 1572267, 1789378, 1186]
69
+ i = 0
70
+ @fs.each_record do |record|
71
+ assert_equal expected[i], record.pos, "record ##{i+1}"
72
+ i += 1
73
+ end
74
+ end
75
+
76
+ should "get the end position" do
77
+ expected = [1572276, 1789384, 1789377, 1572267, 1789378, 1186]
78
+ expected = expected.map { |x| x + 100 }
79
+ i = 0
80
+ @fs.each_record do |record|
81
+ assert_equal expected[i], record.endpos, "record ##{i+1}"
82
+ i += 1
83
+ end
84
+ end
85
+
86
+ should "detect exact matches" do
87
+ expected = [true, true, true, true, true, false]
88
+ i = 0
89
+ @fs.each_record do |record|
90
+ assert_equal expected[i], record.exact_match?, "record ##{i+1}"
91
+ i += 1
92
+ end
93
+ end
94
+
95
+ # should "give A" do
96
+ # assert @l1.get_base_at(0)=="A", "this is #{@l1.get_base_at(0)}, but should be A"
97
+ # end
98
+ #
99
+ # should "parse cigar strings correctly" do
100
+ # @l3.parse_cigar
101
+ # assert @l3.cigar_list.size == 5
102
+ # end
103
+ #
104
+ # should "parse another cigar string correctly" do
105
+ # @l4l.parse_cigar
106
+ # assert @l4l.cigar_list.size == 7
107
+ # end
108
+ #
109
+ # should "not be primary alignment" do
110
+ # assert !@l5l.primary_aln?
111
+ # end
112
+ #
113
+ # should "get the edit distance" do
114
+ # assert_equal 8, @l2.tags[:NM]
115
+ # end
116
+ #
117
+ # should "fail to get the edit distance" do
118
+ # assert_equal nil, @l5l.tags[:NM]
119
+ # end
120
+
121
+ end
122
+ end
metadata CHANGED
@@ -1,109 +1,122 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bettersam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Smith-Unna
8
8
  - Chris Boursnell
9
- - Jesse Rodriguez
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2014-08-05 00:00:00.000000000 Z
12
+ date: 2014-08-13 00:00:00.000000000 Z
14
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ffi
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
15
28
  - !ruby/object:Gem::Dependency
16
29
  name: simplecov
17
30
  requirement: !ruby/object:Gem::Requirement
18
31
  requirements:
19
- - - ~>
32
+ - - "~>"
20
33
  - !ruby/object:Gem::Version
21
34
  version: '0.8'
22
- - - '>='
35
+ - - ">="
23
36
  - !ruby/object:Gem::Version
24
37
  version: 0.8.2
25
38
  type: :development
26
39
  prerelease: false
27
40
  version_requirements: !ruby/object:Gem::Requirement
28
41
  requirements:
29
- - - ~>
42
+ - - "~>"
30
43
  - !ruby/object:Gem::Version
31
44
  version: '0.8'
32
- - - '>='
45
+ - - ">="
33
46
  - !ruby/object:Gem::Version
34
47
  version: 0.8.2
35
48
  - !ruby/object:Gem::Dependency
36
49
  name: rake
37
50
  requirement: !ruby/object:Gem::Requirement
38
51
  requirements:
39
- - - ~>
52
+ - - "~>"
40
53
  - !ruby/object:Gem::Version
41
54
  version: '10.3'
42
- - - '>='
55
+ - - ">="
43
56
  - !ruby/object:Gem::Version
44
57
  version: 10.3.2
45
58
  type: :development
46
59
  prerelease: false
47
60
  version_requirements: !ruby/object:Gem::Requirement
48
61
  requirements:
49
- - - ~>
62
+ - - "~>"
50
63
  - !ruby/object:Gem::Version
51
64
  version: '10.3'
52
- - - '>='
65
+ - - ">="
53
66
  - !ruby/object:Gem::Version
54
67
  version: 10.3.2
55
68
  - !ruby/object:Gem::Dependency
56
69
  name: turn
57
70
  requirement: !ruby/object:Gem::Requirement
58
71
  requirements:
59
- - - ~>
72
+ - - "~>"
60
73
  - !ruby/object:Gem::Version
61
74
  version: '0.9'
62
- - - '>='
75
+ - - ">="
63
76
  - !ruby/object:Gem::Version
64
77
  version: 0.9.7
65
78
  type: :development
66
79
  prerelease: false
67
80
  version_requirements: !ruby/object:Gem::Requirement
68
81
  requirements:
69
- - - ~>
82
+ - - "~>"
70
83
  - !ruby/object:Gem::Version
71
84
  version: '0.9'
72
- - - '>='
85
+ - - ">="
73
86
  - !ruby/object:Gem::Version
74
87
  version: 0.9.7
75
88
  - !ruby/object:Gem::Dependency
76
89
  name: shoulda-context
77
90
  requirement: !ruby/object:Gem::Requirement
78
91
  requirements:
79
- - - ~>
92
+ - - "~>"
80
93
  - !ruby/object:Gem::Version
81
94
  version: '1.2'
82
- - - '>='
95
+ - - ">="
83
96
  - !ruby/object:Gem::Version
84
97
  version: 1.2.1
85
98
  type: :development
86
99
  prerelease: false
87
100
  version_requirements: !ruby/object:Gem::Requirement
88
101
  requirements:
89
- - - ~>
102
+ - - "~>"
90
103
  - !ruby/object:Gem::Version
91
104
  version: '1.2'
92
- - - '>='
105
+ - - ">="
93
106
  - !ruby/object:Gem::Version
94
107
  version: 1.2.1
95
108
  - !ruby/object:Gem::Dependency
96
109
  name: coveralls
97
110
  requirement: !ruby/object:Gem::Requirement
98
111
  requirements:
99
- - - ~>
112
+ - - "~>"
100
113
  - !ruby/object:Gem::Version
101
114
  version: '0.7'
102
115
  type: :development
103
116
  prerelease: false
104
117
  version_requirements: !ruby/object:Gem::Requirement
105
118
  requirements:
106
- - - ~>
119
+ - - "~>"
107
120
  - !ruby/object:Gem::Version
108
121
  version: '0.7'
109
122
  description: Extended SAM (Sequence Alignment/Map) file parsing
@@ -112,12 +125,16 @@ executables: []
112
125
  extensions: []
113
126
  extra_rdoc_files: []
114
127
  files:
128
+ - LICENSE
129
+ - README.md
115
130
  - Rakefile
116
131
  - lib/bettersam.rb
117
- - test/test_test.rb
132
+ - lib/bettersam/library.rb
133
+ - lib/bettersam/samrecord.rb
134
+ - lib/bettersam/samtagset.rb
135
+ - test/data/basic.sam
118
136
  - test/helper.rb
119
- - README.md
120
- - LICENSE
137
+ - test/test_bettersam.rb
121
138
  homepage: https://github.com/blahah/bettersam
122
139
  licenses:
123
140
  - MIT
@@ -128,19 +145,18 @@ require_paths:
128
145
  - lib
129
146
  required_ruby_version: !ruby/object:Gem::Requirement
130
147
  requirements:
131
- - - '>='
148
+ - - ">="
132
149
  - !ruby/object:Gem::Version
133
150
  version: '0'
134
151
  required_rubygems_version: !ruby/object:Gem::Requirement
135
152
  requirements:
136
- - - '>='
153
+ - - ">="
137
154
  - !ruby/object:Gem::Version
138
155
  version: '0'
139
156
  requirements: []
140
157
  rubyforge_project:
141
- rubygems_version: 2.1.4
158
+ rubygems_version: 2.2.2
142
159
  signing_key:
143
160
  specification_version: 4
144
161
  summary: Extended SAM file parsing
145
162
  test_files: []
146
- has_rdoc:
data/test/test_test.rb DELETED
@@ -1,106 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'helper'
4
-
5
- class TestBetterSam < Test::Unit::TestCase
6
-
7
- context "BetterSam" do
8
-
9
- setup do
10
- # this is run before each test
11
- @l1 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 nivara_3s 1572276 40 100M = 1571527 -849 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-24 XN:i:0 XM:i:4 XO:i:0 XG:i:0 NM:i:0 MD:Z:1T1G3T0A91 YS:i:-5 YT:Z:DP")
12
- @l2 = BetterSam.new("FCC00CKABXX:2:1101:10117:6470#CAGATCAT 81 chromosome03 1789384 24 4M5I91M = 1788782 -697 AGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGACG ZbaX^_baX\_S]_ZdYccYebeffddZdbebdadc[bdVeeeceeeddggggggggggggggggegeggdffbfefegggggggggggggggggggggg AS:i:-38 XN:i:0 XM:i:3 XO:i:1 XG:i:5 NM:i:8 MD:Z:0C1T6G85 YS:i:-5 YT:Z:DP")
13
- @l3 = BetterSam.new("FCC00CKABXX:2:1101:19524:66398#CAGATCAT 145 chromosome03 1789377 23 4M1I2M1D93M = 1788766 -711 GGAGGATCGGGCCTCGTGGGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAACGAGCTCGA Bc`aaT\Y_]RLMKKMHEMV_T[Y[deaeeeaadbaaa\_feecedddddadfcegdcXdggcggggggggg`gfbecbcggggggggeggggggggggg AS:i:-51 XN:i:0 XM:i:6 XO:i:2 XG:i:2 NM:i:8 MD:Z:2T0C2^A2T0A5G1A81 YS:i:0 YT:Z:DP")
14
- @l4l = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 nivara_3s 1572267 23 5M2D3M2I3M1I86M = 1571498 -868 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-53 XN:i:0 XM:i:4 XO:i:3 XG:i:5 NM:i:9 MD:Z:2G2^TG3T5T0A81 YS:i:0 YT:Z:DP")
15
- @l4r = BetterSam.new("FCC00CKABXX:2:1101:16909:83925#CAGATCAT 145 chromosome03 1789378 23 7M4I3M5I81M = 1788753 -716 GTCCTCCAGGAGGATCGGGCCTCGTGAGCCGACGGTGAGCGAGTTGTTGTTGTTCCATACGGGGGCGCCGGAGTTGGTGCTCCACAGCGGGCCGTTGAAC BBBBBB_Z`cU]^SZS][]USKV[L`ac`dedeageeefagegagffdd`egedgggedgggggggdggggggggefeeeQgeagggggggggggggggg AS:i:-59 XN:i:0 XM:i:4 XO:i:2 XG:i:9 NM:i:13 MD:Z:3A1A2T6G75 YS:i:0 YT:Z:DP")
16
- @l5l = BetterSam.new("FCC2HFRACXX:7:2314:9299:67450#TGACCAAT 355 Sb02g000720.1 1186 18 71M = 1238 -150 CGTCATCTTCTCTCATATATTTGTATCACCCATCCATCCATCTGCCTTCGATATGCATCTCCACTCCGCCG __^cc]^\`eegea`ffdfghhfd]eghhfffef``degfhf_^gdfhfg_fghhhfdhffdfhffbeWcW AS:i:142 XN:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:71 YS:i:44 YT:Z:CP") # NM:i:0
17
- end
18
-
19
- should "be a paired read" do
20
- assert @l1.read_paired?
21
- end
22
-
23
- should "be on the reverse strand" do
24
- assert @l1.read_reverse_strand?
25
- end
26
-
27
- should "be the first pair" do
28
- assert @l1.first_in_pair?
29
- end
30
-
31
- should "get the mappping position" do
32
- assert @l1.pos == 1572276
33
- end
34
-
35
- should "get the end position" do
36
- assert @l1.endpos == 1572376
37
- end
38
-
39
- should "get the end position too" do
40
- assert @l2.endpos == 1789479, "this is #{@l1.endpos} but should be 1789479"
41
- end
42
-
43
- should "be exact match" do
44
- assert @l1.exact_match?
45
- end
46
-
47
- should "contain snp" do
48
- assert @l1.contains_snp?(1572283)
49
- end
50
-
51
- should "give A" do
52
- assert @l1.get_base_at(0)=="A", "this is #{@l1.get_base_at(0)}, but should be A"
53
- end
54
-
55
- should "parse the cigar string correctly" do
56
- @l3.parse_cigar
57
- assert @l3.cigar_list.size == 5
58
- end
59
-
60
- should "parse another cigar string correctly" do
61
- @l4l.parse_cigar
62
- assert @l4l.cigar_list.size == 7
63
- end
64
-
65
- should "mark a snp" do
66
- assert @l1.mark_snp(1572283)==7, "this is #{@l1.mark_snp(1572283)}, but i think it should be 7"
67
- end
68
-
69
- should "mark yet another snp" do
70
- assert @l4l.mark_snp(1572283)==17, "this is #{@l4l.mark_snp(1572283)}, but i think it should be 17"
71
- end
72
-
73
- should "transfer a snp from one object to another" do
74
- @l1.mark_snp(1572283)
75
- @l2.transfer_snp(@l1)
76
- assert @l2.snp == 7, "this is #{@l2.snp}, but i think it should be 7"
77
- end
78
-
79
- should "mark another snp" do
80
- assert @l2.mark_snp(1789386)==2, "this is #{@l2.mark_snp(1789386)}, but i think it should be 3"
81
- end
82
-
83
- should "mark a third snp" do
84
- assert @l3.mark_snp(1789386)==9, "this is #{@l3.mark_snp(1789386)}, but i think it should be 9"
85
- end
86
-
87
- should "find the location of a snp on the genome" do
88
- @l4l.mark_snp(1572283)
89
- @l4r.transfer_snp(@l4l)
90
- assert @l4r.put_snp==1789386, "this is #{@l4r.put_snp}, but I think it should be 1789386"
91
- end
92
-
93
- should "not be primary alignment" do
94
- assert !@l5l.primary_aln?
95
- end
96
-
97
- should "get the edit distance" do
98
- assert_equal 8, @l2.tags[:NM]
99
- end
100
-
101
- should "fail to get the edit distance" do
102
- assert_equal nil, @l5l.tags[:NM]
103
- end
104
-
105
- end
106
- end