htslib 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -2
- data/TUTORIAL.md +45 -0
- data/lib/hts/bam/auxi.rb +118 -0
- data/lib/hts/bam/base_mod.rb +343 -0
- data/lib/hts/bam/header.rb +17 -0
- data/lib/hts/bam/mpileup.rb +175 -0
- data/lib/hts/bam/pileup.rb +201 -0
- data/lib/hts/bam/record.rb +13 -1
- data/lib/hts/bam.rb +97 -11
- data/lib/hts/bcf/info.rb +158 -0
- data/lib/hts/bcf.rb +39 -6
- data/lib/hts/faidx/sequence.rb +1 -1
- data/lib/hts/faidx.rb +82 -28
- data/lib/hts/hts.rb +1 -1
- data/lib/hts/libhts/constants.rb +38 -3
- data/lib/hts/libhts/sam.rb +34 -23
- data/lib/hts/libhts.rb +6 -0
- data/lib/hts/tabix.rb +43 -2
- data/lib/hts/version.rb +1 -1
- metadata +6 -3
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# High-level mpileup iterator over multiple BAM/CRAM inputs
|
|
6
|
+
class Mpileup
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
# Usage:
|
|
10
|
+
# HTS::Bam::Mpileup.open([bam1, bam2], region: "chr1:1-100") do |mpl|
|
|
11
|
+
# mpl.each { |cols| ... }
|
|
12
|
+
# end
|
|
13
|
+
def self.open(*args, **kw)
|
|
14
|
+
m = new(*args, **kw)
|
|
15
|
+
return m unless block_given?
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
yield m
|
|
19
|
+
ensure
|
|
20
|
+
m.close
|
|
21
|
+
end
|
|
22
|
+
m
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Normalize inputs to HTS::Bam instances
|
|
26
|
+
# Accepts array of HTS::Bam or filenames (String)
|
|
27
|
+
def initialize(inputs, region: nil, beg: nil, end_: nil, maxcnt: nil, overlaps: false)
|
|
28
|
+
raise ArgumentError, "inputs must be non-empty" if inputs.nil? || inputs.empty?
|
|
29
|
+
|
|
30
|
+
@owned_bams = [] # Bams we opened here; will be closed on close
|
|
31
|
+
@bams = inputs.map do |x|
|
|
32
|
+
case x
|
|
33
|
+
when HTS::Bam
|
|
34
|
+
x
|
|
35
|
+
when String
|
|
36
|
+
b = HTS::Bam.open(x)
|
|
37
|
+
@owned_bams << b
|
|
38
|
+
b
|
|
39
|
+
else
|
|
40
|
+
raise ArgumentError, "Unsupported input type: #{x.class}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
n = @bams.length
|
|
45
|
+
@iters = []
|
|
46
|
+
@data_blocks = [] # per-input packed pointers kept alive
|
|
47
|
+
|
|
48
|
+
# Prepare optional region iterators for each input
|
|
49
|
+
@bams.each_with_index do |bam, i|
|
|
50
|
+
itr = nil
|
|
51
|
+
if region && beg.nil? && end_.nil?
|
|
52
|
+
raise "Index required for region mpileup" unless bam.index_loaded?
|
|
53
|
+
|
|
54
|
+
itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), bam.header.struct, region)
|
|
55
|
+
raise "Failed to query region on input ##{i}: #{region}" if itr.null?
|
|
56
|
+
elsif region && beg && end_
|
|
57
|
+
raise "Index required for region mpileup" unless bam.index_loaded?
|
|
58
|
+
|
|
59
|
+
tid = bam.header.get_tid(region)
|
|
60
|
+
itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
|
|
61
|
+
raise "Failed to query region on input ##{i}: #{region} #{beg} #{end_}" if itr.null?
|
|
62
|
+
elsif beg || end_
|
|
63
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
64
|
+
end
|
|
65
|
+
@iters << itr
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build per-input packed pointer blocks so C passes them back to the callback.
|
|
69
|
+
# Layout per input: [0] hts_fp (htsFile*), [1] hdr_struct (bam_hdr_t*), [2] itr (hts_itr_t* or NULL)
|
|
70
|
+
ptr_size = FFI.type_size(:pointer)
|
|
71
|
+
data_array = FFI::MemoryPointer.new(:pointer, n)
|
|
72
|
+
@bams.each_with_index do |bam, i|
|
|
73
|
+
hts_fp = bam.instance_variable_get(:@hts_file)
|
|
74
|
+
hdr_struct = bam.header.struct
|
|
75
|
+
itr = @iters[i]
|
|
76
|
+
block = FFI::MemoryPointer.new(:pointer, 3)
|
|
77
|
+
block.put_pointer(0 * ptr_size, hts_fp)
|
|
78
|
+
block.put_pointer(1 * ptr_size, hdr_struct)
|
|
79
|
+
block.put_pointer(2 * ptr_size, itr && !itr.null? ? itr : FFI::Pointer::NULL)
|
|
80
|
+
@data_blocks << block
|
|
81
|
+
data_array.put_pointer(i * ptr_size, block)
|
|
82
|
+
end
|
|
83
|
+
# Keep the array of per-input blocks alive while the C side holds on to them
|
|
84
|
+
@data_array = data_array
|
|
85
|
+
|
|
86
|
+
@cb = FFI::Function.new(:int, %i[pointer pointer]) do |data, b|
|
|
87
|
+
# Unpack pointers from the per-input block
|
|
88
|
+
hts_fp = data.get_pointer(0 * ptr_size)
|
|
89
|
+
hdr_struct = data.get_pointer(1 * ptr_size)
|
|
90
|
+
itr = data.get_pointer(2 * ptr_size)
|
|
91
|
+
# HTSlib contract: return same as sam_itr_next/sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
|
|
92
|
+
if itr && !itr.null?
|
|
93
|
+
HTS::LibHTS.sam_itr_next(hts_fp, itr, b)
|
|
94
|
+
else
|
|
95
|
+
HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
@iter = HTS::LibHTS.bam_mplp_init(n, @cb, @data_array)
|
|
100
|
+
raise "bam_mplp_init failed" if @iter.null?
|
|
101
|
+
|
|
102
|
+
HTS::LibHTS.bam_mplp_set_maxcnt(@iter, maxcnt) if maxcnt
|
|
103
|
+
return unless overlaps
|
|
104
|
+
|
|
105
|
+
rc = HTS::LibHTS.bam_mplp_init_overlaps(@iter)
|
|
106
|
+
raise "bam_mplp_init_overlaps failed" if rc < 0
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Yields an array of Pileup::PileupColumn (one per input) for each position
|
|
110
|
+
def each
|
|
111
|
+
return to_enum(__method__) unless block_given?
|
|
112
|
+
|
|
113
|
+
n = @bams.length
|
|
114
|
+
tid_ptr = FFI::MemoryPointer.new(:int)
|
|
115
|
+
pos_ptr = FFI::MemoryPointer.new(:long_long)
|
|
116
|
+
n_ptr = FFI::MemoryPointer.new(:int, n)
|
|
117
|
+
plp_ptr = FFI::MemoryPointer.new(:pointer, n)
|
|
118
|
+
plp1_size = HTS::LibHTS::BamPileup1.size
|
|
119
|
+
headers = @bams.map(&:header)
|
|
120
|
+
|
|
121
|
+
while HTS::LibHTS.bam_mplp64_auto(@iter, tid_ptr, pos_ptr, n_ptr, plp_ptr) > 0
|
|
122
|
+
tid = tid_ptr.read_int
|
|
123
|
+
pos = pos_ptr.read_long_long
|
|
124
|
+
|
|
125
|
+
counts = n_ptr.read_array_of_int(n)
|
|
126
|
+
plp_arr = plp_ptr.read_array_of_pointer(n)
|
|
127
|
+
|
|
128
|
+
cols = Array.new(n)
|
|
129
|
+
i = 0
|
|
130
|
+
while i < n
|
|
131
|
+
c = counts[i]
|
|
132
|
+
if c <= 0 || plp_arr[i].null?
|
|
133
|
+
cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: [])
|
|
134
|
+
else
|
|
135
|
+
base_ptr = plp_arr[i]
|
|
136
|
+
aligns = Array.new(c)
|
|
137
|
+
j = 0
|
|
138
|
+
while j < c
|
|
139
|
+
e_ptr = base_ptr + (j * plp1_size)
|
|
140
|
+
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
|
|
141
|
+
aligns[j] = HTS::Bam::Pileup::PileupRecord.new(entry, headers[i])
|
|
142
|
+
j += 1
|
|
143
|
+
end
|
|
144
|
+
cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: aligns)
|
|
145
|
+
end
|
|
146
|
+
i += 1
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
yield cols
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
self
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def close
|
|
156
|
+
if @iter && !@iter.null?
|
|
157
|
+
HTS::LibHTS.bam_mplp_destroy(@iter)
|
|
158
|
+
@iter = FFI::Pointer::NULL
|
|
159
|
+
end
|
|
160
|
+
@iters.each do |itr|
|
|
161
|
+
HTS::LibHTS.hts_itr_destroy(itr) if itr && !itr.null?
|
|
162
|
+
end
|
|
163
|
+
@iters.clear
|
|
164
|
+
# Keep references to callback and data blocks to prevent GC
|
|
165
|
+
@_keepalive = [@cb, @data_array, *@data_blocks]
|
|
166
|
+
# Close owned bams opened by this object
|
|
167
|
+
@owned_bams.each do |b|
|
|
168
|
+
b.close
|
|
169
|
+
rescue StandardError
|
|
170
|
+
end
|
|
171
|
+
@owned_bams.clear
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# High-level pileup iterator for a single SAM/BAM/CRAM
|
|
6
|
+
class Pileup
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
# Usage:
|
|
10
|
+
# HTS::Bam::Pileup.open(bam, region: "chr1:1-100") do |pl|
|
|
11
|
+
# pl.each { |col| ... }
|
|
12
|
+
# end
|
|
13
|
+
def self.open(*args, **kw)
|
|
14
|
+
pu = new(*args, **kw)
|
|
15
|
+
return pu unless block_given?
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
yield pu
|
|
19
|
+
ensure
|
|
20
|
+
pu.close
|
|
21
|
+
end
|
|
22
|
+
pu
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# A column at a reference position with pileup alignments
|
|
26
|
+
PileupColumn = Struct.new(:tid, :pos, :alignments, keyword_init: true) do
|
|
27
|
+
def depth
|
|
28
|
+
alignments.length
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# A wrapper of one bam_pileup1_t entry
|
|
33
|
+
class PileupRecord
|
|
34
|
+
def initialize(entry, header)
|
|
35
|
+
@entry = entry
|
|
36
|
+
@header = header
|
|
37
|
+
@record = nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Return Bam::Record. On the first call, duplicate the underlying bam1_t (bam_dup1)
|
|
41
|
+
# so the record becomes safe to keep beyond the current pileup step. Subsequent calls
|
|
42
|
+
# return the cached Bam::Record instance.
|
|
43
|
+
# NOTE: Without duplication, bam1_t memory may be reused by HTSlib on the next step.
|
|
44
|
+
def record
|
|
45
|
+
return @record if @record
|
|
46
|
+
|
|
47
|
+
# Normalize to a raw pointer and duplicate to obtain owned memory.
|
|
48
|
+
b_ptr = @entry[:b].is_a?(FFI::Pointer) ? @entry[:b] : @entry[:b].to_ptr
|
|
49
|
+
dup_ptr = HTS::LibHTS.bam_dup1(b_ptr)
|
|
50
|
+
raise "bam_dup1 failed" if dup_ptr.null?
|
|
51
|
+
|
|
52
|
+
# Build a Bam::Record backed by the duplicated bam1_t.
|
|
53
|
+
@record = HTS::Bam::Record.new(@header, dup_ptr)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def query_position
|
|
57
|
+
@entry[:qpos]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def indel
|
|
61
|
+
@entry[:indel]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def del?
|
|
65
|
+
@entry[:is_del] == 1
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def head?
|
|
69
|
+
@entry[:is_head] == 1
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def tail?
|
|
73
|
+
@entry[:is_tail] == 1
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def refskip?
|
|
77
|
+
@entry[:is_refskip] == 1
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Create a Pileup iterator
|
|
82
|
+
# @param bam [HTS::Bam]
|
|
83
|
+
# @param region [String, nil] Optional region string (requires index)
|
|
84
|
+
# @param beg [Integer, nil] Optional begin when using tid/beg/end form
|
|
85
|
+
# @param end_ [Integer, nil] Optional end when using tid/beg/end form
|
|
86
|
+
# @param maxcnt [Integer, nil] Max per-position depth (capped)
|
|
87
|
+
def initialize(bam, region: nil, beg: nil, end_: nil, maxcnt: nil)
|
|
88
|
+
@bam = bam
|
|
89
|
+
@header = bam.header
|
|
90
|
+
@itr = nil
|
|
91
|
+
@cb = nil
|
|
92
|
+
@plp = nil
|
|
93
|
+
|
|
94
|
+
# Optional region iterator
|
|
95
|
+
if region && beg.nil? && end_.nil?
|
|
96
|
+
raise "Index file is required to use region pileup." unless bam.index_loaded?
|
|
97
|
+
|
|
98
|
+
@itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), @header.struct, region)
|
|
99
|
+
raise "Failed to query region: #{region}" if @itr.null?
|
|
100
|
+
elsif region && beg && end_
|
|
101
|
+
raise "Index file is required to use region pileup." unless bam.index_loaded?
|
|
102
|
+
|
|
103
|
+
tid = @header.get_tid(region)
|
|
104
|
+
@itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
|
|
105
|
+
raise "Failed to query region: #{region} #{beg} #{end_}" if @itr.null?
|
|
106
|
+
elsif beg || end_
|
|
107
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Build the auto callback for bam_plp_init (micro-optimized)
|
|
111
|
+
# - Hoist ivar/constant lookups out of the callback to reduce per-call overhead.
|
|
112
|
+
# - Specialize callbacks to avoid branching in the hot path.
|
|
113
|
+
hts_fp = @bam.instance_variable_get(:@hts_file)
|
|
114
|
+
hdr_struct = @header.struct
|
|
115
|
+
itr_local = @itr
|
|
116
|
+
|
|
117
|
+
@cb = if itr_local && !itr_local.null?
|
|
118
|
+
FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
|
|
119
|
+
# HTSlib contract: return same as sam_itr_next (>= 0 on success, -1 on EOF, < -1 on error)
|
|
120
|
+
HTS::LibHTS.sam_itr_next(hts_fp, itr_local, b)
|
|
121
|
+
end
|
|
122
|
+
else
|
|
123
|
+
FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
|
|
124
|
+
# HTSlib contract: return same as sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
|
|
125
|
+
HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
@plp = HTS::LibHTS.bam_plp_init(@cb, nil)
|
|
130
|
+
raise "bam_plp_init failed" if @plp.null?
|
|
131
|
+
|
|
132
|
+
HTS::LibHTS.bam_plp_set_maxcnt(@plp, maxcnt) if maxcnt
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def each
|
|
136
|
+
return to_enum(__method__) unless block_given?
|
|
137
|
+
|
|
138
|
+
tid_ptr = FFI::MemoryPointer.new(:int)
|
|
139
|
+
pos_ptr = FFI::MemoryPointer.new(:long_long) # hts_pos_t
|
|
140
|
+
n_ptr = FFI::MemoryPointer.new(:int)
|
|
141
|
+
|
|
142
|
+
# Micro-optimizations:
|
|
143
|
+
# - Compute constant struct size once
|
|
144
|
+
# - Hoist header reference outside the loop
|
|
145
|
+
plp1_size = HTS::LibHTS::BamPileup1.size
|
|
146
|
+
header_local = @header
|
|
147
|
+
|
|
148
|
+
loop do
|
|
149
|
+
base_ptr = HTS::LibHTS.bam_plp64_auto(@plp, tid_ptr, pos_ptr, n_ptr)
|
|
150
|
+
|
|
151
|
+
# When base_ptr is NULL, check n to distinguish EOF (n == 0) from error (n < 0)
|
|
152
|
+
if base_ptr.null?
|
|
153
|
+
n = n_ptr.read_int
|
|
154
|
+
raise "HTSlib pileup error (bam_plp64_auto)" if n < 0
|
|
155
|
+
|
|
156
|
+
break
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
tid = tid_ptr.read_int
|
|
160
|
+
pos = pos_ptr.read_long_long
|
|
161
|
+
n = n_ptr.read_int
|
|
162
|
+
|
|
163
|
+
# Construct alignment entries with minimal allocations
|
|
164
|
+
if n.zero?
|
|
165
|
+
alignments = []
|
|
166
|
+
else
|
|
167
|
+
alignments = Array.new(n)
|
|
168
|
+
i = 0
|
|
169
|
+
while i < n
|
|
170
|
+
e_ptr = base_ptr + (i * plp1_size)
|
|
171
|
+
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
|
|
172
|
+
alignments[i] = PileupRecord.new(entry, header_local)
|
|
173
|
+
i += 1
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
yield PileupColumn.new(tid: tid, pos: pos, alignments: alignments)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
self
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def reset
|
|
184
|
+
HTS::LibHTS.bam_plp_reset(@plp) if @plp && !@plp.null?
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def close
|
|
188
|
+
if @plp && !@plp.null?
|
|
189
|
+
HTS::LibHTS.bam_plp_destroy(@plp)
|
|
190
|
+
@plp = FFI::Pointer::NULL
|
|
191
|
+
end
|
|
192
|
+
if @itr && !@itr.null?
|
|
193
|
+
HTS::LibHTS.hts_itr_destroy(@itr)
|
|
194
|
+
@itr = FFI::Pointer::NULL
|
|
195
|
+
end
|
|
196
|
+
# Keep @cb referenced by instance to avoid GC during iteration.
|
|
197
|
+
@cb
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
data/lib/hts/bam/record.rb
CHANGED
|
@@ -326,6 +326,13 @@ module HTS
|
|
|
326
326
|
end
|
|
327
327
|
end
|
|
328
328
|
|
|
329
|
+
# Get base modification information from MM/ML tags
|
|
330
|
+
# @param auto_parse [Boolean] If true (default), parse lazily on first access
|
|
331
|
+
# @return [BaseMod] Base modification object
|
|
332
|
+
def base_mod(auto_parse: true)
|
|
333
|
+
BaseMod.new(self, auto_parse: auto_parse)
|
|
334
|
+
end
|
|
335
|
+
|
|
329
336
|
# TODO: add a method to get the auxiliary fields as a hash.
|
|
330
337
|
|
|
331
338
|
# TODO: add a method to set the auxiliary fields.
|
|
@@ -352,8 +359,13 @@ module HTS
|
|
|
352
359
|
private
|
|
353
360
|
|
|
354
361
|
def initialize_copy(orig)
|
|
362
|
+
super
|
|
355
363
|
@header = orig.header
|
|
356
|
-
|
|
364
|
+
# Deep-copy underlying bam1_t to detach from original buffer
|
|
365
|
+
dup_bam1 = LibHTS.bam_dup1(orig.struct)
|
|
366
|
+
raise "bam_dup1 failed" if dup_bam1.null?
|
|
367
|
+
|
|
368
|
+
@bam1 = dup_bam1
|
|
357
369
|
end
|
|
358
370
|
end
|
|
359
371
|
end
|
data/lib/hts/bam.rb
CHANGED
|
@@ -7,7 +7,9 @@ require_relative "bam/header"
|
|
|
7
7
|
require_relative "bam/cigar"
|
|
8
8
|
require_relative "bam/flag"
|
|
9
9
|
require_relative "bam/record"
|
|
10
|
-
|
|
10
|
+
require_relative "bam/base_mod"
|
|
11
|
+
require_relative "bam/pileup"
|
|
12
|
+
require_relative "bam/mpileup"
|
|
11
13
|
# require_relative "bam/pileup_entry"
|
|
12
14
|
|
|
13
15
|
module HTS
|
|
@@ -160,7 +162,7 @@ module HTS
|
|
|
160
162
|
|
|
161
163
|
position = tell
|
|
162
164
|
ary = map { |r| r.aux(tag) }
|
|
163
|
-
seek(position)
|
|
165
|
+
seek(position) if position
|
|
164
166
|
ary
|
|
165
167
|
end
|
|
166
168
|
|
|
@@ -194,6 +196,13 @@ module HTS
|
|
|
194
196
|
self
|
|
195
197
|
end
|
|
196
198
|
|
|
199
|
+
# Iterate alignment records in this file.
|
|
200
|
+
#
|
|
201
|
+
# Performance and memory semantics:
|
|
202
|
+
# - copy: false (default) reuses a single Record instance and its underlying bam1_t buffer.
|
|
203
|
+
# The yielded Record MUST NOT be stored beyond the block; its content will be overwritten
|
|
204
|
+
# by the next iteration. If you need to retain it, call `rec = rec.dup`.
|
|
205
|
+
# - copy: true yields a fresh Record per iteration (deep-copied via bam_dup1). Slower, safe to keep.
|
|
197
206
|
def each(copy: false, &block)
|
|
198
207
|
if copy
|
|
199
208
|
each_record_copy(&block)
|
|
@@ -202,23 +211,65 @@ module HTS
|
|
|
202
211
|
end
|
|
203
212
|
end
|
|
204
213
|
|
|
214
|
+
# Iterate records in a genomic region or multiple regions.
|
|
215
|
+
# See {#each} for copy semantics. When copy: false, the yielded Record is reused and should not be stored.
|
|
216
|
+
#
|
|
217
|
+
# @param region [String, Array<String>] Region specification(s)
|
|
218
|
+
# - Single region: "chr1:100-200" or "chr1" with beg/end parameters
|
|
219
|
+
# - Multiple regions: ["chr1:100-200", "chr2:500-600", ...]
|
|
220
|
+
# @param beg [Integer, nil] Start position (used with single string region)
|
|
221
|
+
# @param end_ [Integer, nil] End position (used with single string region)
|
|
222
|
+
# @param copy [Boolean] Whether to deep-copy records (see {#each})
|
|
223
|
+
#
|
|
224
|
+
# @example Single region query
|
|
225
|
+
# bam.query("chr1:100-200") { |r| puts r.qname }
|
|
226
|
+
# bam.query("chr1", 100, 200) { |r| puts r.qname }
|
|
227
|
+
#
|
|
228
|
+
# @example Multi-region query
|
|
229
|
+
# bam.query(["chr1:100-200", "chr2:500-600"]) { |r| puts r.qname }
|
|
205
230
|
def query(region, beg = nil, end_ = nil, copy: false, &block)
|
|
206
231
|
check_closed
|
|
207
232
|
raise "Index file is required to call the query method." unless index_loaded?
|
|
208
233
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
234
|
+
case region
|
|
235
|
+
when Array
|
|
236
|
+
raise ArgumentError, "beg and end_ cannot be used with array of regions" if beg || end_
|
|
237
|
+
|
|
238
|
+
query_regions(region, copy:, &block)
|
|
239
|
+
when String
|
|
240
|
+
if beg && end_
|
|
241
|
+
tid = header.get_tid(region)
|
|
242
|
+
queryi(tid, beg, end_, copy:, &block)
|
|
243
|
+
elsif beg.nil? && end_.nil?
|
|
244
|
+
querys(region, copy:, &block)
|
|
245
|
+
else
|
|
246
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
247
|
+
end
|
|
214
248
|
else
|
|
215
|
-
raise ArgumentError, "
|
|
249
|
+
raise ArgumentError, "region must be String or Array"
|
|
216
250
|
end
|
|
217
251
|
end
|
|
218
252
|
|
|
219
|
-
#
|
|
220
|
-
#
|
|
221
|
-
#
|
|
253
|
+
# Pileup iterator over this file. Optional region can be specified.
|
|
254
|
+
# When a block is given, uses RAII-style and ensures the iterator is closed at block end.
|
|
255
|
+
# Without a block, returns an Enumerator over a live Pileup instance; caller should close when done.
|
|
256
|
+
#
|
|
257
|
+
# @param region [String, nil] region string like "chr1:100-200"
|
|
258
|
+
# @param beg [Integer, nil]
|
|
259
|
+
# @param end_ [Integer, nil]
|
|
260
|
+
# @param maxcnt [Integer, nil] cap on depth per position
|
|
261
|
+
def pileup(region = nil, beg = nil, end_: nil, maxcnt: nil, &block)
|
|
262
|
+
check_closed
|
|
263
|
+
if block_given?
|
|
264
|
+
Pileup.open(self, region:, beg:, end_: end_, maxcnt: maxcnt) do |piter|
|
|
265
|
+
piter.each(&block)
|
|
266
|
+
end
|
|
267
|
+
self
|
|
268
|
+
else
|
|
269
|
+
piter = Pileup.new(self, region:, beg:, end_: end_, maxcnt: maxcnt)
|
|
270
|
+
piter.to_enum(:each)
|
|
271
|
+
end
|
|
272
|
+
end
|
|
222
273
|
|
|
223
274
|
private
|
|
224
275
|
|
|
@@ -238,6 +289,17 @@ module HTS
|
|
|
238
289
|
end
|
|
239
290
|
end
|
|
240
291
|
|
|
292
|
+
# Multi-region query implementation
|
|
293
|
+
def query_regions(regions, copy: false, &block)
|
|
294
|
+
if copy
|
|
295
|
+
query_regions_copy(regions, &block)
|
|
296
|
+
else
|
|
297
|
+
query_regions_reuse(regions, &block)
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Internal: yield a single reused Record over the entire file.
|
|
302
|
+
# The underlying bam1_t is mutated on each iteration for speed.
|
|
241
303
|
def each_record_reuse
|
|
242
304
|
check_closed
|
|
243
305
|
# Each does not always start at the beginning of the file.
|
|
@@ -250,6 +312,7 @@ module HTS
|
|
|
250
312
|
self
|
|
251
313
|
end
|
|
252
314
|
|
|
315
|
+
# Internal: yield deep-copied Records so callers may retain them safely.
|
|
253
316
|
def each_record_copy
|
|
254
317
|
check_closed
|
|
255
318
|
return to_enum(__method__) unless block_given?
|
|
@@ -301,6 +364,7 @@ module HTS
|
|
|
301
364
|
self
|
|
302
365
|
end
|
|
303
366
|
|
|
367
|
+
# Internal: reused-Record iterator over a query iterator.
|
|
304
368
|
def query_reuse_yield(qiter)
|
|
305
369
|
bam1 = LibHTS.bam_init1
|
|
306
370
|
record = Record.new(header, bam1)
|
|
@@ -323,5 +387,27 @@ module HTS
|
|
|
323
387
|
ensure
|
|
324
388
|
LibHTS.hts_itr_destroy(qiter)
|
|
325
389
|
end
|
|
390
|
+
|
|
391
|
+
# Multi-region query using sequential single-region queries
|
|
392
|
+
# Note: This is a fallback implementation. Ideally we would use sam_itr_regarray
|
|
393
|
+
# but there seem to be issues with the multi-region iterator in the current setup.
|
|
394
|
+
def query_regions_reuse(regions, &block)
|
|
395
|
+
return to_enum(__method__, regions) unless block_given?
|
|
396
|
+
|
|
397
|
+
regions.each do |region|
|
|
398
|
+
querys_reuse(region, &block)
|
|
399
|
+
end
|
|
400
|
+
self
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Multi-region query with copied Records using sequential queries
|
|
404
|
+
def query_regions_copy(regions, &block)
|
|
405
|
+
return to_enum(__method__, regions) unless block_given?
|
|
406
|
+
|
|
407
|
+
regions.each do |region|
|
|
408
|
+
querys_copy(region, &block)
|
|
409
|
+
end
|
|
410
|
+
self
|
|
411
|
+
end
|
|
326
412
|
end
|
|
327
413
|
end
|