htslib 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/hts/bam/base_mod.rb +343 -0
- data/lib/hts/bam/header.rb +17 -0
- data/lib/hts/bam/mpileup.rb +175 -0
- data/lib/hts/bam/pileup.rb +201 -0
- data/lib/hts/bam/record.rb +13 -1
- data/lib/hts/bam.rb +114 -11
- data/lib/hts/bcf.rb +42 -9
- data/lib/hts/faidx.rb +1 -1
- data/lib/hts/hts.rb +1 -1
- data/lib/hts/libhts/constants.rb +39 -6
- data/lib/hts/libhts/sam.rb +34 -23
- data/lib/hts/libhts/sam_funcs.rb +1 -1
- data/lib/hts/libhts/vcf_funcs.rb +1 -1
- data/lib/hts/libhts.rb +6 -0
- data/lib/hts/tabix.rb +22 -3
- data/lib/hts/version.rb +1 -1
- metadata +6 -7
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# High-level pileup iterator for a single SAM/BAM/CRAM
|
|
6
|
+
class Pileup
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
# Usage:
|
|
10
|
+
# HTS::Bam::Pileup.open(bam, region: "chr1:1-100") do |pl|
|
|
11
|
+
# pl.each { |col| ... }
|
|
12
|
+
# end
|
|
13
|
+
def self.open(*args, **kw)
|
|
14
|
+
pu = new(*args, **kw)
|
|
15
|
+
return pu unless block_given?
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
yield pu
|
|
19
|
+
ensure
|
|
20
|
+
pu.close
|
|
21
|
+
end
|
|
22
|
+
pu
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# A column at a reference position with pileup alignments
|
|
26
|
+
PileupColumn = Struct.new(:tid, :pos, :alignments, keyword_init: true) do
|
|
27
|
+
def depth
|
|
28
|
+
alignments.length
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# A wrapper of one bam_pileup1_t entry
|
|
33
|
+
class PileupRecord
|
|
34
|
+
def initialize(entry, header)
|
|
35
|
+
@entry = entry
|
|
36
|
+
@header = header
|
|
37
|
+
@record = nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Return Bam::Record. On the first call, duplicate the underlying bam1_t (bam_dup1)
|
|
41
|
+
# so the record becomes safe to keep beyond the current pileup step. Subsequent calls
|
|
42
|
+
# return the cached Bam::Record instance.
|
|
43
|
+
# NOTE: Without duplication, bam1_t memory may be reused by HTSlib on the next step.
|
|
44
|
+
def record
|
|
45
|
+
return @record if @record
|
|
46
|
+
|
|
47
|
+
# Normalize to a raw pointer and duplicate to obtain owned memory.
|
|
48
|
+
b_ptr = @entry[:b].is_a?(FFI::Pointer) ? @entry[:b] : @entry[:b].to_ptr
|
|
49
|
+
dup_ptr = HTS::LibHTS.bam_dup1(b_ptr)
|
|
50
|
+
raise "bam_dup1 failed" if dup_ptr.null?
|
|
51
|
+
|
|
52
|
+
# Build a Bam::Record backed by the duplicated bam1_t.
|
|
53
|
+
@record = HTS::Bam::Record.new(@header, dup_ptr)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def query_position
|
|
57
|
+
@entry[:qpos]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def indel
|
|
61
|
+
@entry[:indel]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def del?
|
|
65
|
+
@entry[:is_del] == 1
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def head?
|
|
69
|
+
@entry[:is_head] == 1
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def tail?
|
|
73
|
+
@entry[:is_tail] == 1
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def refskip?
|
|
77
|
+
@entry[:is_refskip] == 1
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Create a Pileup iterator
|
|
82
|
+
# @param bam [HTS::Bam]
|
|
83
|
+
# @param region [String, nil] Optional region string (requires index)
|
|
84
|
+
# @param beg [Integer, nil] Optional begin when using tid/beg/end form
|
|
85
|
+
# @param end_ [Integer, nil] Optional end when using tid/beg/end form
|
|
86
|
+
# @param maxcnt [Integer, nil] Max per-position depth (capped)
|
|
87
|
+
def initialize(bam, region: nil, beg: nil, end_: nil, maxcnt: nil)
|
|
88
|
+
@bam = bam
|
|
89
|
+
@header = bam.header
|
|
90
|
+
@itr = nil
|
|
91
|
+
@cb = nil
|
|
92
|
+
@plp = nil
|
|
93
|
+
|
|
94
|
+
# Optional region iterator
|
|
95
|
+
if region && beg.nil? && end_.nil?
|
|
96
|
+
raise "Index file is required to use region pileup." unless bam.index_loaded?
|
|
97
|
+
|
|
98
|
+
@itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), @header.struct, region)
|
|
99
|
+
raise "Failed to query region: #{region}" if @itr.null?
|
|
100
|
+
elsif region && beg && end_
|
|
101
|
+
raise "Index file is required to use region pileup." unless bam.index_loaded?
|
|
102
|
+
|
|
103
|
+
tid = @header.get_tid(region)
|
|
104
|
+
@itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
|
|
105
|
+
raise "Failed to query region: #{region} #{beg} #{end_}" if @itr.null?
|
|
106
|
+
elsif beg || end_
|
|
107
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Build the auto callback for bam_plp_init (micro-optimized)
|
|
111
|
+
# - Hoist ivar/constant lookups out of the callback to reduce per-call overhead.
|
|
112
|
+
# - Specialize callbacks to avoid branching in the hot path.
|
|
113
|
+
hts_fp = @bam.instance_variable_get(:@hts_file)
|
|
114
|
+
hdr_struct = @header.struct
|
|
115
|
+
itr_local = @itr
|
|
116
|
+
|
|
117
|
+
@cb = if itr_local && !itr_local.null?
|
|
118
|
+
FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
|
|
119
|
+
# HTSlib contract: return same as sam_itr_next (>= 0 on success, -1 on EOF, < -1 on error)
|
|
120
|
+
HTS::LibHTS.sam_itr_next(hts_fp, itr_local, b)
|
|
121
|
+
end
|
|
122
|
+
else
|
|
123
|
+
FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
|
|
124
|
+
# HTSlib contract: return same as sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
|
|
125
|
+
HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
@plp = HTS::LibHTS.bam_plp_init(@cb, nil)
|
|
130
|
+
raise "bam_plp_init failed" if @plp.null?
|
|
131
|
+
|
|
132
|
+
HTS::LibHTS.bam_plp_set_maxcnt(@plp, maxcnt) if maxcnt
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def each
|
|
136
|
+
return to_enum(__method__) unless block_given?
|
|
137
|
+
|
|
138
|
+
tid_ptr = FFI::MemoryPointer.new(:int)
|
|
139
|
+
pos_ptr = FFI::MemoryPointer.new(:long_long) # hts_pos_t
|
|
140
|
+
n_ptr = FFI::MemoryPointer.new(:int)
|
|
141
|
+
|
|
142
|
+
# Micro-optimizations:
|
|
143
|
+
# - Compute constant struct size once
|
|
144
|
+
# - Hoist header reference outside the loop
|
|
145
|
+
plp1_size = HTS::LibHTS::BamPileup1.size
|
|
146
|
+
header_local = @header
|
|
147
|
+
|
|
148
|
+
loop do
|
|
149
|
+
base_ptr = HTS::LibHTS.bam_plp64_auto(@plp, tid_ptr, pos_ptr, n_ptr)
|
|
150
|
+
|
|
151
|
+
# When base_ptr is NULL, check n to distinguish EOF (n == 0) from error (n < 0)
|
|
152
|
+
if base_ptr.null?
|
|
153
|
+
n = n_ptr.read_int
|
|
154
|
+
raise "HTSlib pileup error (bam_plp64_auto)" if n < 0
|
|
155
|
+
|
|
156
|
+
break
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
tid = tid_ptr.read_int
|
|
160
|
+
pos = pos_ptr.read_long_long
|
|
161
|
+
n = n_ptr.read_int
|
|
162
|
+
|
|
163
|
+
# Construct alignment entries with minimal allocations
|
|
164
|
+
if n.zero?
|
|
165
|
+
alignments = []
|
|
166
|
+
else
|
|
167
|
+
alignments = Array.new(n)
|
|
168
|
+
i = 0
|
|
169
|
+
while i < n
|
|
170
|
+
e_ptr = base_ptr + (i * plp1_size)
|
|
171
|
+
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
|
|
172
|
+
alignments[i] = PileupRecord.new(entry, header_local)
|
|
173
|
+
i += 1
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
yield PileupColumn.new(tid: tid, pos: pos, alignments: alignments)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
self
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def reset
|
|
184
|
+
HTS::LibHTS.bam_plp_reset(@plp) if @plp && !@plp.null?
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def close
|
|
188
|
+
if @plp && !@plp.null?
|
|
189
|
+
HTS::LibHTS.bam_plp_destroy(@plp)
|
|
190
|
+
@plp = FFI::Pointer::NULL
|
|
191
|
+
end
|
|
192
|
+
if @itr && !@itr.null?
|
|
193
|
+
HTS::LibHTS.hts_itr_destroy(@itr)
|
|
194
|
+
@itr = FFI::Pointer::NULL
|
|
195
|
+
end
|
|
196
|
+
# Keep @cb referenced by instance to avoid GC during iteration.
|
|
197
|
+
@cb
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
data/lib/hts/bam/record.rb
CHANGED
|
@@ -326,6 +326,13 @@ module HTS
|
|
|
326
326
|
end
|
|
327
327
|
end
|
|
328
328
|
|
|
329
|
+
# Get base modification information from MM/ML tags
|
|
330
|
+
# @param auto_parse [Boolean] If true (default), parse lazily on first access
|
|
331
|
+
# @return [BaseMod] Base modification object
|
|
332
|
+
def base_mod(auto_parse: true)
|
|
333
|
+
BaseMod.new(self, auto_parse: auto_parse)
|
|
334
|
+
end
|
|
335
|
+
|
|
329
336
|
# TODO: add a method to get the auxiliary fields as a hash.
|
|
330
337
|
|
|
331
338
|
# TODO: add a method to set the auxiliary fields.
|
|
@@ -352,8 +359,13 @@ module HTS
|
|
|
352
359
|
private
|
|
353
360
|
|
|
354
361
|
def initialize_copy(orig)
|
|
362
|
+
super
|
|
355
363
|
@header = orig.header
|
|
356
|
-
|
|
364
|
+
# Deep-copy underlying bam1_t to detach from original buffer
|
|
365
|
+
dup_bam1 = LibHTS.bam_dup1(orig.struct)
|
|
366
|
+
raise "bam_dup1 failed" if dup_bam1.null?
|
|
367
|
+
|
|
368
|
+
@bam1 = dup_bam1
|
|
357
369
|
end
|
|
358
370
|
end
|
|
359
371
|
end
|
data/lib/hts/bam.rb
CHANGED
|
@@ -7,6 +7,10 @@ require_relative "bam/header"
|
|
|
7
7
|
require_relative "bam/cigar"
|
|
8
8
|
require_relative "bam/flag"
|
|
9
9
|
require_relative "bam/record"
|
|
10
|
+
require_relative "bam/base_mod"
|
|
11
|
+
require_relative "bam/pileup"
|
|
12
|
+
require_relative "bam/mpileup"
|
|
13
|
+
# require_relative "bam/pileup_entry"
|
|
10
14
|
|
|
11
15
|
module HTS
|
|
12
16
|
# A class for working with SAM, BAM, CRAM files.
|
|
@@ -30,7 +34,7 @@ module HTS
|
|
|
30
34
|
def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
|
|
31
35
|
build_index: false)
|
|
32
36
|
if block_given?
|
|
33
|
-
message = "HTS::Bam.new()
|
|
37
|
+
message = "HTS::Bam.new() does not take block; Please use HTS::Bam.open() instead"
|
|
34
38
|
raise message
|
|
35
39
|
end
|
|
36
40
|
|
|
@@ -44,6 +48,17 @@ module HTS
|
|
|
44
48
|
|
|
45
49
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
|
|
46
50
|
|
|
51
|
+
# Auto-detect and set reference for CRAM files
|
|
52
|
+
if fai.nil? && @file_name.end_with?(".cram")
|
|
53
|
+
# Try to find reference file in the same directory
|
|
54
|
+
base_name = File.basename(@file_name, ".cram")
|
|
55
|
+
dir_name = File.dirname(@file_name)
|
|
56
|
+
potential_ref = File.join(dir_name, "#{base_name}.fa")
|
|
57
|
+
|
|
58
|
+
# For remote URLs, assume reference exists; for local files, check existence
|
|
59
|
+
fai = potential_ref if @file_name.start_with?("http") || File.exist?(potential_ref)
|
|
60
|
+
end
|
|
61
|
+
|
|
47
62
|
if fai
|
|
48
63
|
r = LibHTS.hts_set_fai_filename(@hts_file, fai)
|
|
49
64
|
raise "Failed to load fasta index: #{fai}" if r < 0
|
|
@@ -67,7 +82,7 @@ module HTS
|
|
|
67
82
|
else
|
|
68
83
|
warn "Create index for #{@file_name}"
|
|
69
84
|
end
|
|
70
|
-
case LibHTS.sam_index_build3(@file_name, index_name, min_shift,
|
|
85
|
+
case LibHTS.sam_index_build3(@file_name, index_name, min_shift, @nthreads || threads)
|
|
71
86
|
when 0 # successful
|
|
72
87
|
when -1 then raise "indexing failed"
|
|
73
88
|
when -2 then raise "opening #{@file_name} failed"
|
|
@@ -95,7 +110,7 @@ module HTS
|
|
|
95
110
|
end
|
|
96
111
|
|
|
97
112
|
def close
|
|
98
|
-
LibHTS.hts_idx_destroy(@idx) if @idx
|
|
113
|
+
LibHTS.hts_idx_destroy(@idx) if @idx && !@idx.null?
|
|
99
114
|
@idx = nil
|
|
100
115
|
super
|
|
101
116
|
end
|
|
@@ -141,13 +156,13 @@ module HTS
|
|
|
141
156
|
alias isize insert_size
|
|
142
157
|
alias mpos mate_pos
|
|
143
158
|
|
|
144
|
-
#
|
|
159
|
+
# FIXME: experimental
|
|
145
160
|
def aux(tag)
|
|
146
161
|
check_closed
|
|
147
162
|
|
|
148
163
|
position = tell
|
|
149
164
|
ary = map { |r| r.aux(tag) }
|
|
150
|
-
seek(position)
|
|
165
|
+
seek(position) if position
|
|
151
166
|
ary
|
|
152
167
|
end
|
|
153
168
|
|
|
@@ -181,6 +196,13 @@ module HTS
|
|
|
181
196
|
self
|
|
182
197
|
end
|
|
183
198
|
|
|
199
|
+
# Iterate alignment records in this file.
|
|
200
|
+
#
|
|
201
|
+
# Performance and memory semantics:
|
|
202
|
+
# - copy: false (default) reuses a single Record instance and its underlying bam1_t buffer.
|
|
203
|
+
# The yielded Record MUST NOT be stored beyond the block; its content will be overwritten
|
|
204
|
+
# by the next iteration. If you need to retain it, call `rec = rec.dup`.
|
|
205
|
+
# - copy: true yields a fresh Record per iteration (deep-copied via bam_dup1). Slower, safe to keep.
|
|
184
206
|
def each(copy: false, &block)
|
|
185
207
|
if copy
|
|
186
208
|
each_record_copy(&block)
|
|
@@ -189,17 +211,63 @@ module HTS
|
|
|
189
211
|
end
|
|
190
212
|
end
|
|
191
213
|
|
|
214
|
+
# Iterate records in a genomic region or multiple regions.
|
|
215
|
+
# See {#each} for copy semantics. When copy: false, the yielded Record is reused and should not be stored.
|
|
216
|
+
#
|
|
217
|
+
# @param region [String, Array<String>] Region specification(s)
|
|
218
|
+
# - Single region: "chr1:100-200" or "chr1" with beg/end parameters
|
|
219
|
+
# - Multiple regions: ["chr1:100-200", "chr2:500-600", ...]
|
|
220
|
+
# @param beg [Integer, nil] Start position (used with single string region)
|
|
221
|
+
# @param end_ [Integer, nil] End position (used with single string region)
|
|
222
|
+
# @param copy [Boolean] Whether to deep-copy records (see {#each})
|
|
223
|
+
#
|
|
224
|
+
# @example Single region query
|
|
225
|
+
# bam.query("chr1:100-200") { |r| puts r.qname }
|
|
226
|
+
# bam.query("chr1", 100, 200) { |r| puts r.qname }
|
|
227
|
+
#
|
|
228
|
+
# @example Multi-region query
|
|
229
|
+
# bam.query(["chr1:100-200", "chr2:500-600"]) { |r| puts r.qname }
|
|
192
230
|
def query(region, beg = nil, end_ = nil, copy: false, &block)
|
|
193
231
|
check_closed
|
|
194
232
|
raise "Index file is required to call the query method." unless index_loaded?
|
|
195
233
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
234
|
+
case region
|
|
235
|
+
when Array
|
|
236
|
+
raise ArgumentError, "beg and end_ cannot be used with array of regions" if beg || end_
|
|
237
|
+
|
|
238
|
+
query_regions(region, copy:, &block)
|
|
239
|
+
when String
|
|
240
|
+
if beg && end_
|
|
241
|
+
tid = header.get_tid(region)
|
|
242
|
+
queryi(tid, beg, end_, copy:, &block)
|
|
243
|
+
elsif beg.nil? && end_.nil?
|
|
244
|
+
querys(region, copy:, &block)
|
|
245
|
+
else
|
|
246
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
247
|
+
end
|
|
248
|
+
else
|
|
249
|
+
raise ArgumentError, "region must be String or Array"
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Pileup iterator over this file. Optional region can be specified.
|
|
254
|
+
# When a block is given, uses RAII-style and ensures the iterator is closed at block end.
|
|
255
|
+
# Without a block, returns an Enumerator over a live Pileup instance; caller should close when done.
|
|
256
|
+
#
|
|
257
|
+
# @param region [String, nil] region string like "chr1:100-200"
|
|
258
|
+
# @param beg [Integer, nil]
|
|
259
|
+
# @param end_ [Integer, nil]
|
|
260
|
+
# @param maxcnt [Integer, nil] cap on depth per position
|
|
261
|
+
def pileup(region = nil, beg = nil, end_: nil, maxcnt: nil, &block)
|
|
262
|
+
check_closed
|
|
263
|
+
if block_given?
|
|
264
|
+
Pileup.open(self, region:, beg:, end_: end_, maxcnt: maxcnt) do |piter|
|
|
265
|
+
piter.each(&block)
|
|
266
|
+
end
|
|
267
|
+
self
|
|
201
268
|
else
|
|
202
|
-
|
|
269
|
+
piter = Pileup.new(self, region:, beg:, end_: end_, maxcnt: maxcnt)
|
|
270
|
+
piter.to_enum(:each)
|
|
203
271
|
end
|
|
204
272
|
end
|
|
205
273
|
|
|
@@ -221,6 +289,17 @@ module HTS
|
|
|
221
289
|
end
|
|
222
290
|
end
|
|
223
291
|
|
|
292
|
+
# Multi-region query implementation
|
|
293
|
+
def query_regions(regions, copy: false, &block)
|
|
294
|
+
if copy
|
|
295
|
+
query_regions_copy(regions, &block)
|
|
296
|
+
else
|
|
297
|
+
query_regions_reuse(regions, &block)
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Internal: yield a single reused Record over the entire file.
|
|
302
|
+
# The underlying bam1_t is mutated on each iteration for speed.
|
|
224
303
|
def each_record_reuse
|
|
225
304
|
check_closed
|
|
226
305
|
# Each does not always start at the beginning of the file.
|
|
@@ -233,6 +312,7 @@ module HTS
|
|
|
233
312
|
self
|
|
234
313
|
end
|
|
235
314
|
|
|
315
|
+
# Internal: yield deep-copied Records so callers may retain them safely.
|
|
236
316
|
def each_record_copy
|
|
237
317
|
check_closed
|
|
238
318
|
return to_enum(__method__) unless block_given?
|
|
@@ -284,6 +364,7 @@ module HTS
|
|
|
284
364
|
self
|
|
285
365
|
end
|
|
286
366
|
|
|
367
|
+
# Internal: reused-Record iterator over a query iterator.
|
|
287
368
|
def query_reuse_yield(qiter)
|
|
288
369
|
bam1 = LibHTS.bam_init1
|
|
289
370
|
record = Record.new(header, bam1)
|
|
@@ -306,5 +387,27 @@ module HTS
|
|
|
306
387
|
ensure
|
|
307
388
|
LibHTS.hts_itr_destroy(qiter)
|
|
308
389
|
end
|
|
390
|
+
|
|
391
|
+
# Multi-region query using sequential single-region queries
|
|
392
|
+
# Note: This is a fallback implementation. Ideally we would use sam_itr_regarray
|
|
393
|
+
# but there seem to be issues with the multi-region iterator in the current setup.
|
|
394
|
+
def query_regions_reuse(regions, &block)
|
|
395
|
+
return to_enum(__method__, regions) unless block_given?
|
|
396
|
+
|
|
397
|
+
regions.each do |region|
|
|
398
|
+
querys_reuse(region, &block)
|
|
399
|
+
end
|
|
400
|
+
self
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Multi-region query with copied Records using sequential queries
|
|
404
|
+
def query_regions_copy(regions, &block)
|
|
405
|
+
return to_enum(__method__, regions) unless block_given?
|
|
406
|
+
|
|
407
|
+
regions.each do |region|
|
|
408
|
+
querys_copy(region, &block)
|
|
409
|
+
end
|
|
410
|
+
self
|
|
411
|
+
end
|
|
309
412
|
end
|
|
310
413
|
end
|
data/lib/hts/bcf.rb
CHANGED
|
@@ -30,7 +30,7 @@ module HTS
|
|
|
30
30
|
def initialize(file_name, mode = "r", index: nil, threads: nil,
|
|
31
31
|
build_index: false)
|
|
32
32
|
if block_given?
|
|
33
|
-
message = "HTS::Bcf.new()
|
|
33
|
+
message = "HTS::Bcf.new() does not take block; Please use HTS::Bcf.open() instead"
|
|
34
34
|
raise message
|
|
35
35
|
end
|
|
36
36
|
|
|
@@ -62,7 +62,7 @@ module HTS
|
|
|
62
62
|
else
|
|
63
63
|
warn "Create index for #{@file_name}"
|
|
64
64
|
end
|
|
65
|
-
case LibHTS.bcf_index_build3(@file_name, index_name, min_shift,
|
|
65
|
+
case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads || threads)
|
|
66
66
|
when 0 # successful
|
|
67
67
|
when -1 then raise "indexing failed"
|
|
68
68
|
when -2 then raise "opening #{@file_name} failed"
|
|
@@ -90,7 +90,7 @@ module HTS
|
|
|
90
90
|
end
|
|
91
91
|
|
|
92
92
|
def close
|
|
93
|
-
LibHTS.hts_idx_destroy(@idx)
|
|
93
|
+
LibHTS.hts_idx_destroy(@idx) if @idx && !@idx.null?
|
|
94
94
|
@idx = nil
|
|
95
95
|
super
|
|
96
96
|
end
|
|
@@ -215,13 +215,20 @@ module HTS
|
|
|
215
215
|
raise "query is only available for BCF files" unless file_format == "bcf"
|
|
216
216
|
raise "Index file is required to call the query method." unless index_loaded?
|
|
217
217
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
218
|
+
case region
|
|
219
|
+
when Array
|
|
220
|
+
raise ArgumentError, "beg and end must not be specified when region is an Array" unless beg.nil? && end_.nil?
|
|
221
|
+
|
|
222
|
+
query_regions(region, copy:, &block)
|
|
223
223
|
else
|
|
224
|
-
|
|
224
|
+
if beg && end_
|
|
225
|
+
tid = header.name2id(region)
|
|
226
|
+
queryi(tid, beg, end_, copy:, &block)
|
|
227
|
+
elsif beg.nil? && end_.nil?
|
|
228
|
+
querys(region, copy:, &block)
|
|
229
|
+
else
|
|
230
|
+
raise ArgumentError, "beg and end must be specified together"
|
|
231
|
+
end
|
|
225
232
|
end
|
|
226
233
|
end
|
|
227
234
|
|
|
@@ -243,6 +250,14 @@ module HTS
|
|
|
243
250
|
end
|
|
244
251
|
end
|
|
245
252
|
|
|
253
|
+
def query_regions(regions, copy: false, &block)
|
|
254
|
+
if copy
|
|
255
|
+
query_regions_copy(regions, &block)
|
|
256
|
+
else
|
|
257
|
+
query_regions_reuse(regions, &block)
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
246
261
|
def queryi_reuse(tid, beg, end_, &block)
|
|
247
262
|
return to_enum(__method__, tid, beg, end_) unless block_given?
|
|
248
263
|
|
|
@@ -263,6 +278,15 @@ module HTS
|
|
|
263
278
|
self
|
|
264
279
|
end
|
|
265
280
|
|
|
281
|
+
def query_regions_reuse(regions, &block)
|
|
282
|
+
return to_enum(__method__, regions) unless block_given?
|
|
283
|
+
|
|
284
|
+
regions.each do |region|
|
|
285
|
+
querys_reuse(region, &block)
|
|
286
|
+
end
|
|
287
|
+
self
|
|
288
|
+
end
|
|
289
|
+
|
|
266
290
|
def query_reuse_yield(qiter)
|
|
267
291
|
bcf1 = LibHTS.bcf_init
|
|
268
292
|
record = Record.new(header, bcf1)
|
|
@@ -299,6 +323,15 @@ module HTS
|
|
|
299
323
|
self
|
|
300
324
|
end
|
|
301
325
|
|
|
326
|
+
def query_regions_copy(regions, &block)
|
|
327
|
+
return to_enum(__method__, regions) unless block_given?
|
|
328
|
+
|
|
329
|
+
regions.each do |region|
|
|
330
|
+
querys_copy(region, &block)
|
|
331
|
+
end
|
|
332
|
+
self
|
|
333
|
+
end
|
|
334
|
+
|
|
302
335
|
def query_copy_yield(qiter)
|
|
303
336
|
loop do
|
|
304
337
|
bcf1 = LibHTS.bcf_init
|
data/lib/hts/faidx.rb
CHANGED
|
@@ -21,7 +21,7 @@ module HTS
|
|
|
21
21
|
|
|
22
22
|
def initialize(file_name)
|
|
23
23
|
if block_given?
|
|
24
|
-
message = "HTS::Faidx.new()
|
|
24
|
+
message = "HTS::Faidx.new() does not take block; Please use HTS::Faidx.open() instead"
|
|
25
25
|
raise message
|
|
26
26
|
end
|
|
27
27
|
|
data/lib/hts/hts.rb
CHANGED
data/lib/hts/libhts/constants.rb
CHANGED
|
@@ -4,7 +4,6 @@ module HTS
|
|
|
4
4
|
# Module for working with C HTSlib.
|
|
5
5
|
module LibHTS
|
|
6
6
|
typedef :int64, :hts_pos_t
|
|
7
|
-
typedef :pointer, :bam_plp_auto_f
|
|
8
7
|
|
|
9
8
|
# kstring
|
|
10
9
|
|
|
@@ -158,7 +157,7 @@ module HTS
|
|
|
158
157
|
:specific, :pointer
|
|
159
158
|
end
|
|
160
159
|
|
|
161
|
-
class HtsIdx < FFI::Struct
|
|
160
|
+
class HtsIdx < FFI::Struct
|
|
162
161
|
layout \
|
|
163
162
|
:fmt, :int,
|
|
164
163
|
:min_shift, :int,
|
|
@@ -189,9 +188,9 @@ module HTS
|
|
|
189
188
|
:n_unmapped, :uint64
|
|
190
189
|
)
|
|
191
190
|
|
|
192
|
-
def self.release(ptr)
|
|
193
|
-
|
|
194
|
-
end
|
|
191
|
+
# def self.release(ptr)
|
|
192
|
+
# LibHTS.hts_idx_destroy(ptr) unless ptr.null?
|
|
193
|
+
# end
|
|
195
194
|
end
|
|
196
195
|
|
|
197
196
|
class HtsReglist < FFI::Struct
|
|
@@ -352,6 +351,36 @@ module HTS
|
|
|
352
351
|
end
|
|
353
352
|
end
|
|
354
353
|
|
|
354
|
+
# Internal: Non-owning view of bam1_t used when the pointer is managed by HTSlib
|
|
355
|
+
# (e.g., pileup/mpileup). This struct mirrors the layout of bam1_t and MUST NOT
|
|
356
|
+
# free memory on GC. Do not expose publicly; use only for read-only access.
|
|
357
|
+
class Bam1View < FFI::Struct
|
|
358
|
+
layout \
|
|
359
|
+
:core, Bam1Core,
|
|
360
|
+
:id, :uint64,
|
|
361
|
+
:data, :pointer, # uint8_t
|
|
362
|
+
:l_data, :int,
|
|
363
|
+
:m_data, :uint32,
|
|
364
|
+
:_mempolicy, :uint32 # bit_fields
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Base modification structure
|
|
368
|
+
class HtsBaseMod < FFI::Struct
|
|
369
|
+
layout \
|
|
370
|
+
:modified_base, :int,
|
|
371
|
+
:canonical_base, :int,
|
|
372
|
+
:strand, :int,
|
|
373
|
+
:qual, :int
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# Base modification state (opaque pointer)
|
|
377
|
+
# Use AutoPointer since the structure is opaque and we only need custom release.
|
|
378
|
+
class HtsBaseModState < FFI::AutoPointer
|
|
379
|
+
def self.release(ptr)
|
|
380
|
+
LibHTS.hts_base_mod_state_free(ptr) unless ptr.null?
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
355
384
|
typedef :pointer, :bam_plp
|
|
356
385
|
typedef :pointer, :bam_mplp
|
|
357
386
|
|
|
@@ -364,7 +393,7 @@ module HTS
|
|
|
364
393
|
|
|
365
394
|
class BamPileup1 < FFI::BitStruct
|
|
366
395
|
layout \
|
|
367
|
-
:b,
|
|
396
|
+
:b, :pointer,
|
|
368
397
|
:qpos, :int32,
|
|
369
398
|
:indel, :int,
|
|
370
399
|
:level, :int,
|
|
@@ -379,6 +408,10 @@ module HTS
|
|
|
379
408
|
:is_refskip, 1,
|
|
380
409
|
:_reserved, 1,
|
|
381
410
|
:aux, 27
|
|
411
|
+
|
|
412
|
+
# def self.release(ptr)
|
|
413
|
+
# LibHTS.bam_plp_destroy(ptr) unless ptr.null?
|
|
414
|
+
# end
|
|
382
415
|
end
|
|
383
416
|
|
|
384
417
|
class TbxConf < FFI::Struct
|