htslib 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -2
- data/lib/hts/bam/base_mod.rb +343 -0
- data/lib/hts/bam/header.rb +17 -0
- data/lib/hts/bam/mpileup.rb +175 -0
- data/lib/hts/bam/pileup.rb +201 -0
- data/lib/hts/bam/record.rb +13 -1
- data/lib/hts/bam.rb +97 -11
- data/lib/hts/bcf.rb +39 -6
- data/lib/hts/hts.rb +1 -1
- data/lib/hts/libhts/constants.rb +31 -2
- data/lib/hts/libhts/sam.rb +34 -23
- data/lib/hts/libhts.rb +6 -0
- data/lib/hts/tabix.rb +21 -2
- data/lib/hts/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e1bf158506931c62ffae1a524158de9fbb451796a68a7586cf788203f67a8cc4
|
|
4
|
+
data.tar.gz: d3289551dac8783cfa23f1f8d44e1b4be44b6dab3d6369f816491ceea653188f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d1c316a599c2dc08e6589f980e9dd4ae6d8d6bababbfef424f35e5c25453b667bdc48570578e88a9eba142553fffb25b49ae4de54a061f99f7cbf286988ec618
|
|
7
|
+
data.tar.gz: 8529c6a02c354419dd722abc0bd6f27ec514ffe980f4b0d7014914de11551a45c4e96b803bc77255c3b423129a8743a3c39556d9d3d44ec6c6692556485379cf
|
data/README.md
CHANGED
|
@@ -165,8 +165,6 @@ Try Crystal. [HTS.cr](https://github.com/bio-cr/hts.cr) is implemented in Crysta
|
|
|
165
165
|
|
|
166
166
|
## Development
|
|
167
167
|
|
|
168
|
-

|
|
169
|
-
|
|
170
168
|
#### Compile from source code
|
|
171
169
|
|
|
172
170
|
[GNU Autotools](https://en.wikipedia.org/wiki/GNU_Autotools) is required to compile htslib.
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# Base modification information from MM/ML tags
|
|
6
|
+
#
|
|
7
|
+
# This class provides access to DNA/RNA base modifications such as methylation.
|
|
8
|
+
# It wraps the htslib base modification API and provides a Ruby-friendly interface.
|
|
9
|
+
#
|
|
10
|
+
# @note BaseMod is a view object that references data in a Record.
|
|
11
|
+
# The state is maintained in hts_base_mod_state structure.
|
|
12
|
+
class BaseMod
|
|
13
|
+
include Enumerable
|
|
14
|
+
|
|
15
|
+
class NotParsedError < StandardError; end
|
|
16
|
+
|
|
17
|
+
attr_reader :record
|
|
18
|
+
|
|
19
|
+
# Individual base modification information
|
|
20
|
+
class Modification
|
|
21
|
+
attr_reader :modified_base, :canonical_base, :strand, :qual
|
|
22
|
+
|
|
23
|
+
# @param modified_base [Integer] Modification code as char or -ChEBI
|
|
24
|
+
# @param canonical_base [Integer] Canonical base (A, C, G, T, N)
|
|
25
|
+
# @param strand [Integer] 0 or 1 for +/- strand
|
|
26
|
+
# @param qual [Integer] Quality (256*probability) or -1 if unknown
|
|
27
|
+
def initialize(modified_base:, canonical_base:, strand:, qual:)
|
|
28
|
+
@modified_base = modified_base
|
|
29
|
+
@canonical_base = canonical_base
|
|
30
|
+
@strand = strand
|
|
31
|
+
@qual = qual
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Get modification code as character or ChEBI number as string
|
|
35
|
+
# @return [String] Single character code or ChEBI number as string
|
|
36
|
+
def code
|
|
37
|
+
@modified_base > 0 ? @modified_base.chr : @modified_base.to_s
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Get canonical base as character
|
|
41
|
+
# @return [String] Single character (A, C, G, T, N)
|
|
42
|
+
def canonical
|
|
43
|
+
@canonical_base.chr
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Get likelihood as a probability (0.0-1.0)
|
|
47
|
+
# @return [Float, nil] Probability or nil if qual is -1
|
|
48
|
+
def probability
|
|
49
|
+
return nil if @qual == -1
|
|
50
|
+
|
|
51
|
+
@qual / 256.0
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Convert to hash representation
|
|
55
|
+
# @return [Hash] Hash with modification information
|
|
56
|
+
def to_h
|
|
57
|
+
{
|
|
58
|
+
modified_base: @modified_base,
|
|
59
|
+
code: code,
|
|
60
|
+
canonical_base: @canonical_base,
|
|
61
|
+
canonical: canonical,
|
|
62
|
+
strand: @strand,
|
|
63
|
+
qual: @qual,
|
|
64
|
+
probability: probability
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# String representation
|
|
69
|
+
# @return [String] String representation of the modification
|
|
70
|
+
def to_s
|
|
71
|
+
if @qual >= 0
|
|
72
|
+
"#{canonical}->#{code}(#{probability.round(3)})"
|
|
73
|
+
else
|
|
74
|
+
"#{canonical}->#{code}"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Inspect string
|
|
79
|
+
# @return [String] Inspect string
|
|
80
|
+
def inspect
|
|
81
|
+
"#<HTS::Bam::BaseMod::Modification #{self}>"
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Position-specific modification information
|
|
86
|
+
class Position
|
|
87
|
+
attr_reader :position, :modifications
|
|
88
|
+
|
|
89
|
+
# @param position [Integer] Position in query sequence
|
|
90
|
+
# @param modifications [Array<Modification>] Array of modifications at this position
|
|
91
|
+
def initialize(position, modifications)
|
|
92
|
+
@position = position
|
|
93
|
+
@modifications = modifications
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Check if this position has methylation
|
|
97
|
+
# @return [Boolean] true if any modification is methylation ('m')
|
|
98
|
+
def methylated?
|
|
99
|
+
@modifications.any? { |m| m.code == "m" }
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Check if this position has hydroxymethylation
|
|
103
|
+
# @return [Boolean] true if any modification is hydroxymethylation ('h')
|
|
104
|
+
def hydroxymethylated?
|
|
105
|
+
@modifications.any? { |m| m.code == "h" }
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Convert to hash representation
|
|
109
|
+
# @return [Hash] Hash with position information
|
|
110
|
+
def to_h
|
|
111
|
+
{
|
|
112
|
+
position: @position,
|
|
113
|
+
modifications: @modifications.map(&:to_h)
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# String representation
|
|
118
|
+
# @return [String] String representation
|
|
119
|
+
def to_s
|
|
120
|
+
mods_str = @modifications.map(&:to_s).join(", ")
|
|
121
|
+
"pos=#{@position} [#{mods_str}]"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Inspect string
|
|
125
|
+
# @return [String] Inspect string
|
|
126
|
+
def inspect
|
|
127
|
+
"#<HTS::Bam::BaseMod::Position #{self}>"
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Initialize a new BaseMod object
|
|
132
|
+
# @param record [Record] The BAM record to extract modifications from
|
|
133
|
+
# @param auto_parse [Boolean] If true, parse MM/ML lazily on first access
|
|
134
|
+
def initialize(record, auto_parse: true)
|
|
135
|
+
@record = record
|
|
136
|
+
@state = LibHTS.hts_base_mod_state_alloc
|
|
137
|
+
@closed = false
|
|
138
|
+
@auto_parse = !!auto_parse
|
|
139
|
+
@parsed = false
|
|
140
|
+
raise Error, "Failed to allocate hts_base_mod_state" if @state.null?
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Explicitly free the state
|
|
144
|
+
# @return [void]
|
|
145
|
+
def close
|
|
146
|
+
return if @closed
|
|
147
|
+
|
|
148
|
+
# With HtsBaseModState as an AutoPointer, releasing the Ruby object
|
|
149
|
+
# is sufficient. Avoid manual free to prevent double-free.
|
|
150
|
+
@state = nil
|
|
151
|
+
@closed = true
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Whether this object has parsed MM/ML tags already
|
|
155
|
+
# @return [Boolean]
|
|
156
|
+
def parsed?
|
|
157
|
+
@parsed
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Ensure MM/ML have been parsed, performing lazy parse if enabled.
|
|
161
|
+
# @param flags [Integer]
|
|
162
|
+
# @return [void]
|
|
163
|
+
def ensure_parsed!(flags = 0)
|
|
164
|
+
return if @parsed
|
|
165
|
+
|
|
166
|
+
raise NotParsedError, "BaseMod is not parsed. Call #parse first (auto_parse is disabled)." unless @auto_parse
|
|
167
|
+
|
|
168
|
+
parse(flags)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Parse MM and ML tags from the record
|
|
172
|
+
# @param flags [Integer] Parsing flags (default: 0)
|
|
173
|
+
# @return [Integer] Number of modification types found, or -1 on error
|
|
174
|
+
# @raise [Error] If parsing fails
|
|
175
|
+
def parse(flags = 0)
|
|
176
|
+
ret = LibHTS.bam_parse_basemod2(@record.struct, @state, flags)
|
|
177
|
+
raise Error, "Failed to parse base modifications" if ret < 0
|
|
178
|
+
|
|
179
|
+
@parsed = true
|
|
180
|
+
ret
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Get modification information at a specific query position
|
|
184
|
+
# @param position [Integer] Query position (0-based)
|
|
185
|
+
# @param max_mods [Integer] Maximum number of modifications to retrieve
|
|
186
|
+
# @return [Position, nil] Position object with modifications, or nil if none
|
|
187
|
+
def at_pos(position, max_mods: 10)
|
|
188
|
+
# Reset state to ensure deterministic results even after prior iteration
|
|
189
|
+
parsed? ? parse : ensure_parsed!
|
|
190
|
+
|
|
191
|
+
mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
|
|
192
|
+
|
|
193
|
+
ret = LibHTS.bam_mods_at_qpos(@record.struct, position, @state,
|
|
194
|
+
mods_ptr, max_mods)
|
|
195
|
+
return nil if ret <= 0
|
|
196
|
+
|
|
197
|
+
build_position(position, mods_ptr, [ret, max_mods].min)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Array-style access to modifications at a position
|
|
201
|
+
# @param position [Integer] Query position (0-based)
|
|
202
|
+
# @return [Position, nil] Position object with modifications, or nil if none
|
|
203
|
+
def [](position)
|
|
204
|
+
at_pos(position)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Iterate over all positions with modifications
|
|
208
|
+
# @param max_mods [Integer] Maximum number of modifications per position
|
|
209
|
+
# @yield [Position] Position object for each modified position
|
|
210
|
+
# @return [Enumerator] If no block given
|
|
211
|
+
def each_position(max_mods: 10)
|
|
212
|
+
return enum_for(__method__, max_mods: max_mods) unless block_given?
|
|
213
|
+
|
|
214
|
+
# Reset state at the start of iteration to allow repeated enumerations
|
|
215
|
+
parsed? ? parse : ensure_parsed!
|
|
216
|
+
|
|
217
|
+
pos_ptr = FFI::MemoryPointer.new(:int)
|
|
218
|
+
mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
|
|
219
|
+
|
|
220
|
+
loop do
|
|
221
|
+
ret = LibHTS.bam_next_basemod(@record.struct, @state,
|
|
222
|
+
mods_ptr, max_mods, pos_ptr)
|
|
223
|
+
break if ret <= 0
|
|
224
|
+
|
|
225
|
+
position = pos_ptr.read_int
|
|
226
|
+
yield build_position(position, mods_ptr, [ret, max_mods].min)
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
alias each each_position
|
|
231
|
+
|
|
232
|
+
# Get list of modification types present in this record
|
|
233
|
+
# @return [Array<Integer>] Array of modification codes (char code or -ChEBI)
|
|
234
|
+
def modification_types
|
|
235
|
+
ensure_parsed!
|
|
236
|
+
|
|
237
|
+
ntype_ptr = FFI::MemoryPointer.new(:int)
|
|
238
|
+
codes_ptr = LibHTS.bam_mods_recorded(@state, ntype_ptr)
|
|
239
|
+
|
|
240
|
+
ntype = ntype_ptr.read_int
|
|
241
|
+
return [] if ntype <= 0 || codes_ptr.null?
|
|
242
|
+
|
|
243
|
+
codes_ptr.read_array_of_int(ntype)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
alias recorded_types modification_types
|
|
247
|
+
|
|
248
|
+
# Query information about a specific modification type by code
|
|
249
|
+
# @param code [Integer, String] Modification code (char code or -ChEBI, or single char string)
|
|
250
|
+
# @return [Hash, nil] Hash with canonical, strand, implicit info, or nil if not found
|
|
251
|
+
def query_type(code)
|
|
252
|
+
ensure_parsed!
|
|
253
|
+
|
|
254
|
+
code = code.ord if code.is_a?(String)
|
|
255
|
+
|
|
256
|
+
strand_ptr = FFI::MemoryPointer.new(:int)
|
|
257
|
+
implicit_ptr = FFI::MemoryPointer.new(:int)
|
|
258
|
+
canonical_ptr = FFI::MemoryPointer.new(:char, 1)
|
|
259
|
+
|
|
260
|
+
ret = LibHTS.bam_mods_query_type(@state, code, strand_ptr,
|
|
261
|
+
implicit_ptr, canonical_ptr)
|
|
262
|
+
return nil if ret < 0
|
|
263
|
+
|
|
264
|
+
{
|
|
265
|
+
canonical: canonical_ptr.read_char.chr,
|
|
266
|
+
strand: strand_ptr.read_int,
|
|
267
|
+
implicit: implicit_ptr.read_int != 0
|
|
268
|
+
}
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Query information about i-th modification type
|
|
272
|
+
# @param index [Integer] Modification type index (0-based)
|
|
273
|
+
# @return [Hash, nil] Hash with code, canonical, strand, implicit info
|
|
274
|
+
def query_type_at(index)
|
|
275
|
+
ensure_parsed!
|
|
276
|
+
|
|
277
|
+
strand_ptr = FFI::MemoryPointer.new(:int)
|
|
278
|
+
implicit_ptr = FFI::MemoryPointer.new(:int)
|
|
279
|
+
canonical_ptr = FFI::MemoryPointer.new(:char, 1)
|
|
280
|
+
|
|
281
|
+
ret = LibHTS.bam_mods_queryi(@state, index, strand_ptr,
|
|
282
|
+
implicit_ptr, canonical_ptr)
|
|
283
|
+
return nil if ret < 0
|
|
284
|
+
|
|
285
|
+
types = modification_types
|
|
286
|
+
{
|
|
287
|
+
code: types[index],
|
|
288
|
+
canonical: canonical_ptr.read_char.chr,
|
|
289
|
+
strand: strand_ptr.read_int,
|
|
290
|
+
implicit: implicit_ptr.read_int != 0
|
|
291
|
+
}
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Get all modifications as an array
|
|
295
|
+
# @return [Array<Position>] Array of all positions with modifications
|
|
296
|
+
def to_a
|
|
297
|
+
each_position.to_a
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# String representation for debugging
|
|
301
|
+
# @return [String] String representation
|
|
302
|
+
def to_s
|
|
303
|
+
return "#<HTS::Bam::BaseMod (not parsed)>" unless @parsed
|
|
304
|
+
|
|
305
|
+
mods = []
|
|
306
|
+
each_position do |pos|
|
|
307
|
+
mods << pos.to_s
|
|
308
|
+
end
|
|
309
|
+
"#<HTS::Bam::BaseMod #{mods.join(' ')}>"
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Inspect string
|
|
313
|
+
# @return [String] Inspect string
|
|
314
|
+
def inspect
|
|
315
|
+
to_s
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
private
|
|
319
|
+
|
|
320
|
+
# Build Position object from hts_base_mod array
|
|
321
|
+
# @param position [Integer] Query position
|
|
322
|
+
# @param mods_ptr [FFI::Pointer] Pointer to array of HtsBaseMod structures
|
|
323
|
+
# @param n_mods [Integer] Number of modifications
|
|
324
|
+
# @return [Position] Position object
|
|
325
|
+
def build_position(position, mods_ptr, n_mods)
|
|
326
|
+
modifications = []
|
|
327
|
+
|
|
328
|
+
n_mods.times do |i|
|
|
329
|
+
mod_struct = LibHTS::HtsBaseMod.new(mods_ptr + i * LibHTS::HtsBaseMod.size)
|
|
330
|
+
|
|
331
|
+
modifications << Modification.new(
|
|
332
|
+
modified_base: mod_struct[:modified_base],
|
|
333
|
+
canonical_base: mod_struct[:canonical_base],
|
|
334
|
+
strand: mod_struct[:strand],
|
|
335
|
+
qual: mod_struct[:qual]
|
|
336
|
+
)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
Position.new(position, modifications)
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
end
|
data/lib/hts/bam/header.rb
CHANGED
|
@@ -111,6 +111,23 @@ module HTS
|
|
|
111
111
|
name2tid(name)
|
|
112
112
|
end
|
|
113
113
|
|
|
114
|
+
# Add a @PG (program) line to the header
|
|
115
|
+
# @param program_name [String] Name of the program
|
|
116
|
+
# @param options [Hash] Key-value pairs for @PG tags (ID, PN, VN, CL, PP, etc.)
|
|
117
|
+
# @return [Integer] 0 on success, -1 on failure
|
|
118
|
+
#
|
|
119
|
+
# This is a convenience wrapper around sam_hdr_add_pg that automatically:
|
|
120
|
+
# - Generates a unique ID if the specified one clashes
|
|
121
|
+
# - Manages PP (previous program) chains automatically
|
|
122
|
+
#
|
|
123
|
+
# @example
|
|
124
|
+
# header.add_pg("bwa", VN: "0.7.17", CL: "bwa mem ref.fa read.fq")
|
|
125
|
+
# header.add_pg("samtools", VN: "1.15", PP: "bwa")
|
|
126
|
+
def add_pg(program_name, **options)
|
|
127
|
+
args = options.flat_map { |k, v| [:string, k.to_s, :string, v.to_s] }
|
|
128
|
+
LibHTS.sam_hdr_add_pg(@sam_hdr, program_name, *args, :pointer, FFI::Pointer::NULL)
|
|
129
|
+
end
|
|
130
|
+
|
|
114
131
|
private
|
|
115
132
|
|
|
116
133
|
def name2tid(name)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# High-level mpileup iterator over multiple BAM/CRAM inputs
|
|
6
|
+
class Mpileup
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
# Usage:
|
|
10
|
+
# HTS::Bam::Mpileup.open([bam1, bam2], region: "chr1:1-100") do |mpl|
|
|
11
|
+
# mpl.each { |cols| ... }
|
|
12
|
+
# end
|
|
13
|
+
def self.open(*args, **kw)
|
|
14
|
+
m = new(*args, **kw)
|
|
15
|
+
return m unless block_given?
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
yield m
|
|
19
|
+
ensure
|
|
20
|
+
m.close
|
|
21
|
+
end
|
|
22
|
+
m
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Normalize inputs to HTS::Bam instances
|
|
26
|
+
# Accepts array of HTS::Bam or filenames (String)
|
|
27
|
+
def initialize(inputs, region: nil, beg: nil, end_: nil, maxcnt: nil, overlaps: false)
|
|
28
|
+
raise ArgumentError, "inputs must be non-empty" if inputs.nil? || inputs.empty?
|
|
29
|
+
|
|
30
|
+
@owned_bams = [] # Bams we opened here; will be closed on close
|
|
31
|
+
@bams = inputs.map do |x|
|
|
32
|
+
case x
|
|
33
|
+
when HTS::Bam
|
|
34
|
+
x
|
|
35
|
+
when String
|
|
36
|
+
b = HTS::Bam.open(x)
|
|
37
|
+
@owned_bams << b
|
|
38
|
+
b
|
|
39
|
+
else
|
|
40
|
+
raise ArgumentError, "Unsupported input type: #{x.class}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
n = @bams.length
|
|
45
|
+
@iters = []
|
|
46
|
+
@data_blocks = [] # per-input packed pointers kept alive
|
|
47
|
+
|
|
48
|
+
# Prepare optional region iterators for each input
|
|
49
|
+
@bams.each_with_index do |bam, i|
|
|
50
|
+
itr = nil
|
|
51
|
+
if region && beg.nil? && end_.nil?
|
|
52
|
+
raise "Index required for region mpileup" unless bam.index_loaded?
|
|
53
|
+
|
|
54
|
+
itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), bam.header.struct, region)
|
|
55
|
+
raise "Failed to query region on input ##{i}: #{region}" if itr.null?
|
|
56
|
+
elsif region && beg && end_
|
|
57
|
+
raise "Index required for region mpileup" unless bam.index_loaded?
|
|
58
|
+
|
|
59
|
+
tid = bam.header.get_tid(region)
|
|
60
|
+
itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
|
|
61
|
+
raise "Failed to query region on input ##{i}: #{region} #{beg} #{end_}" if itr.null?
|
|
62
|
+
elsif beg || end_
|
|
63
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
64
|
+
end
|
|
65
|
+
@iters << itr
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build per-input packed pointer blocks so C passes them back to the callback.
|
|
69
|
+
# Layout per input: [0] hts_fp (htsFile*), [1] hdr_struct (bam_hdr_t*), [2] itr (hts_itr_t* or NULL)
|
|
70
|
+
ptr_size = FFI.type_size(:pointer)
|
|
71
|
+
data_array = FFI::MemoryPointer.new(:pointer, n)
|
|
72
|
+
@bams.each_with_index do |bam, i|
|
|
73
|
+
hts_fp = bam.instance_variable_get(:@hts_file)
|
|
74
|
+
hdr_struct = bam.header.struct
|
|
75
|
+
itr = @iters[i]
|
|
76
|
+
block = FFI::MemoryPointer.new(:pointer, 3)
|
|
77
|
+
block.put_pointer(0 * ptr_size, hts_fp)
|
|
78
|
+
block.put_pointer(1 * ptr_size, hdr_struct)
|
|
79
|
+
block.put_pointer(2 * ptr_size, itr && !itr.null? ? itr : FFI::Pointer::NULL)
|
|
80
|
+
@data_blocks << block
|
|
81
|
+
data_array.put_pointer(i * ptr_size, block)
|
|
82
|
+
end
|
|
83
|
+
# Keep the array of per-input blocks alive while the C side holds on to them
|
|
84
|
+
@data_array = data_array
|
|
85
|
+
|
|
86
|
+
@cb = FFI::Function.new(:int, %i[pointer pointer]) do |data, b|
|
|
87
|
+
# Unpack pointers from the per-input block
|
|
88
|
+
hts_fp = data.get_pointer(0 * ptr_size)
|
|
89
|
+
hdr_struct = data.get_pointer(1 * ptr_size)
|
|
90
|
+
itr = data.get_pointer(2 * ptr_size)
|
|
91
|
+
# HTSlib contract: return same as sam_itr_next/sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
|
|
92
|
+
if itr && !itr.null?
|
|
93
|
+
HTS::LibHTS.sam_itr_next(hts_fp, itr, b)
|
|
94
|
+
else
|
|
95
|
+
HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
@iter = HTS::LibHTS.bam_mplp_init(n, @cb, @data_array)
|
|
100
|
+
raise "bam_mplp_init failed" if @iter.null?
|
|
101
|
+
|
|
102
|
+
HTS::LibHTS.bam_mplp_set_maxcnt(@iter, maxcnt) if maxcnt
|
|
103
|
+
return unless overlaps
|
|
104
|
+
|
|
105
|
+
rc = HTS::LibHTS.bam_mplp_init_overlaps(@iter)
|
|
106
|
+
raise "bam_mplp_init_overlaps failed" if rc < 0
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Yields an array of Pileup::PileupColumn (one per input) for each position
|
|
110
|
+
def each
|
|
111
|
+
return to_enum(__method__) unless block_given?
|
|
112
|
+
|
|
113
|
+
n = @bams.length
|
|
114
|
+
tid_ptr = FFI::MemoryPointer.new(:int)
|
|
115
|
+
pos_ptr = FFI::MemoryPointer.new(:long_long)
|
|
116
|
+
n_ptr = FFI::MemoryPointer.new(:int, n)
|
|
117
|
+
plp_ptr = FFI::MemoryPointer.new(:pointer, n)
|
|
118
|
+
plp1_size = HTS::LibHTS::BamPileup1.size
|
|
119
|
+
headers = @bams.map(&:header)
|
|
120
|
+
|
|
121
|
+
while HTS::LibHTS.bam_mplp64_auto(@iter, tid_ptr, pos_ptr, n_ptr, plp_ptr) > 0
|
|
122
|
+
tid = tid_ptr.read_int
|
|
123
|
+
pos = pos_ptr.read_long_long
|
|
124
|
+
|
|
125
|
+
counts = n_ptr.read_array_of_int(n)
|
|
126
|
+
plp_arr = plp_ptr.read_array_of_pointer(n)
|
|
127
|
+
|
|
128
|
+
cols = Array.new(n)
|
|
129
|
+
i = 0
|
|
130
|
+
while i < n
|
|
131
|
+
c = counts[i]
|
|
132
|
+
if c <= 0 || plp_arr[i].null?
|
|
133
|
+
cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: [])
|
|
134
|
+
else
|
|
135
|
+
base_ptr = plp_arr[i]
|
|
136
|
+
aligns = Array.new(c)
|
|
137
|
+
j = 0
|
|
138
|
+
while j < c
|
|
139
|
+
e_ptr = base_ptr + (j * plp1_size)
|
|
140
|
+
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
|
|
141
|
+
aligns[j] = HTS::Bam::Pileup::PileupRecord.new(entry, headers[i])
|
|
142
|
+
j += 1
|
|
143
|
+
end
|
|
144
|
+
cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: aligns)
|
|
145
|
+
end
|
|
146
|
+
i += 1
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
yield cols
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
self
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def close
|
|
156
|
+
if @iter && !@iter.null?
|
|
157
|
+
HTS::LibHTS.bam_mplp_destroy(@iter)
|
|
158
|
+
@iter = FFI::Pointer::NULL
|
|
159
|
+
end
|
|
160
|
+
@iters.each do |itr|
|
|
161
|
+
HTS::LibHTS.hts_itr_destroy(itr) if itr && !itr.null?
|
|
162
|
+
end
|
|
163
|
+
@iters.clear
|
|
164
|
+
# Keep references to callback and data blocks to prevent GC
|
|
165
|
+
@_keepalive = [@cb, @data_array, *@data_blocks]
|
|
166
|
+
# Close owned bams opened by this object
|
|
167
|
+
@owned_bams.each do |b|
|
|
168
|
+
b.close
|
|
169
|
+
rescue StandardError
|
|
170
|
+
end
|
|
171
|
+
@owned_bams.clear
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# High-level pileup iterator for a single SAM/BAM/CRAM
|
|
6
|
+
class Pileup
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
# Usage:
|
|
10
|
+
# HTS::Bam::Pileup.open(bam, region: "chr1:1-100") do |pl|
|
|
11
|
+
# pl.each { |col| ... }
|
|
12
|
+
# end
|
|
13
|
+
def self.open(*args, **kw)
|
|
14
|
+
pu = new(*args, **kw)
|
|
15
|
+
return pu unless block_given?
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
yield pu
|
|
19
|
+
ensure
|
|
20
|
+
pu.close
|
|
21
|
+
end
|
|
22
|
+
pu
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# A column at a reference position with pileup alignments
|
|
26
|
+
PileupColumn = Struct.new(:tid, :pos, :alignments, keyword_init: true) do
|
|
27
|
+
def depth
|
|
28
|
+
alignments.length
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# A wrapper of one bam_pileup1_t entry
|
|
33
|
+
class PileupRecord
|
|
34
|
+
def initialize(entry, header)
|
|
35
|
+
@entry = entry
|
|
36
|
+
@header = header
|
|
37
|
+
@record = nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Return Bam::Record. On the first call, duplicate the underlying bam1_t (bam_dup1)
|
|
41
|
+
# so the record becomes safe to keep beyond the current pileup step. Subsequent calls
|
|
42
|
+
# return the cached Bam::Record instance.
|
|
43
|
+
# NOTE: Without duplication, bam1_t memory may be reused by HTSlib on the next step.
|
|
44
|
+
def record
|
|
45
|
+
return @record if @record
|
|
46
|
+
|
|
47
|
+
# Normalize to a raw pointer and duplicate to obtain owned memory.
|
|
48
|
+
b_ptr = @entry[:b].is_a?(FFI::Pointer) ? @entry[:b] : @entry[:b].to_ptr
|
|
49
|
+
dup_ptr = HTS::LibHTS.bam_dup1(b_ptr)
|
|
50
|
+
raise "bam_dup1 failed" if dup_ptr.null?
|
|
51
|
+
|
|
52
|
+
# Build a Bam::Record backed by the duplicated bam1_t.
|
|
53
|
+
@record = HTS::Bam::Record.new(@header, dup_ptr)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def query_position
|
|
57
|
+
@entry[:qpos]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def indel
|
|
61
|
+
@entry[:indel]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def del?
|
|
65
|
+
@entry[:is_del] == 1
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def head?
|
|
69
|
+
@entry[:is_head] == 1
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def tail?
|
|
73
|
+
@entry[:is_tail] == 1
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def refskip?
|
|
77
|
+
@entry[:is_refskip] == 1
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Create a Pileup iterator
|
|
82
|
+
# @param bam [HTS::Bam]
|
|
83
|
+
# @param region [String, nil] Optional region string (requires index)
|
|
84
|
+
# @param beg [Integer, nil] Optional begin when using tid/beg/end form
|
|
85
|
+
# @param end_ [Integer, nil] Optional end when using tid/beg/end form
|
|
86
|
+
# @param maxcnt [Integer, nil] Max per-position depth (capped)
|
|
87
|
+
def initialize(bam, region: nil, beg: nil, end_: nil, maxcnt: nil)
|
|
88
|
+
@bam = bam
|
|
89
|
+
@header = bam.header
|
|
90
|
+
@itr = nil
|
|
91
|
+
@cb = nil
|
|
92
|
+
@plp = nil
|
|
93
|
+
|
|
94
|
+
# Optional region iterator
|
|
95
|
+
if region && beg.nil? && end_.nil?
|
|
96
|
+
raise "Index file is required to use region pileup." unless bam.index_loaded?
|
|
97
|
+
|
|
98
|
+
@itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), @header.struct, region)
|
|
99
|
+
raise "Failed to query region: #{region}" if @itr.null?
|
|
100
|
+
elsif region && beg && end_
|
|
101
|
+
raise "Index file is required to use region pileup." unless bam.index_loaded?
|
|
102
|
+
|
|
103
|
+
tid = @header.get_tid(region)
|
|
104
|
+
@itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
|
|
105
|
+
raise "Failed to query region: #{region} #{beg} #{end_}" if @itr.null?
|
|
106
|
+
elsif beg || end_
|
|
107
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Build the auto callback for bam_plp_init (micro-optimized)
|
|
111
|
+
# - Hoist ivar/constant lookups out of the callback to reduce per-call overhead.
|
|
112
|
+
# - Specialize callbacks to avoid branching in the hot path.
|
|
113
|
+
hts_fp = @bam.instance_variable_get(:@hts_file)
|
|
114
|
+
hdr_struct = @header.struct
|
|
115
|
+
itr_local = @itr
|
|
116
|
+
|
|
117
|
+
@cb = if itr_local && !itr_local.null?
|
|
118
|
+
FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
|
|
119
|
+
# HTSlib contract: return same as sam_itr_next (>= 0 on success, -1 on EOF, < -1 on error)
|
|
120
|
+
HTS::LibHTS.sam_itr_next(hts_fp, itr_local, b)
|
|
121
|
+
end
|
|
122
|
+
else
|
|
123
|
+
FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
|
|
124
|
+
# HTSlib contract: return same as sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
|
|
125
|
+
HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
@plp = HTS::LibHTS.bam_plp_init(@cb, nil)
|
|
130
|
+
raise "bam_plp_init failed" if @plp.null?
|
|
131
|
+
|
|
132
|
+
HTS::LibHTS.bam_plp_set_maxcnt(@plp, maxcnt) if maxcnt
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def each
|
|
136
|
+
return to_enum(__method__) unless block_given?
|
|
137
|
+
|
|
138
|
+
tid_ptr = FFI::MemoryPointer.new(:int)
|
|
139
|
+
pos_ptr = FFI::MemoryPointer.new(:long_long) # hts_pos_t
|
|
140
|
+
n_ptr = FFI::MemoryPointer.new(:int)
|
|
141
|
+
|
|
142
|
+
# Micro-optimizations:
|
|
143
|
+
# - Compute constant struct size once
|
|
144
|
+
# - Hoist header reference outside the loop
|
|
145
|
+
plp1_size = HTS::LibHTS::BamPileup1.size
|
|
146
|
+
header_local = @header
|
|
147
|
+
|
|
148
|
+
loop do
|
|
149
|
+
base_ptr = HTS::LibHTS.bam_plp64_auto(@plp, tid_ptr, pos_ptr, n_ptr)
|
|
150
|
+
|
|
151
|
+
# When base_ptr is NULL, check n to distinguish EOF (n == 0) from error (n < 0)
|
|
152
|
+
if base_ptr.null?
|
|
153
|
+
n = n_ptr.read_int
|
|
154
|
+
raise "HTSlib pileup error (bam_plp64_auto)" if n < 0
|
|
155
|
+
|
|
156
|
+
break
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
tid = tid_ptr.read_int
|
|
160
|
+
pos = pos_ptr.read_long_long
|
|
161
|
+
n = n_ptr.read_int
|
|
162
|
+
|
|
163
|
+
# Construct alignment entries with minimal allocations
|
|
164
|
+
if n.zero?
|
|
165
|
+
alignments = []
|
|
166
|
+
else
|
|
167
|
+
alignments = Array.new(n)
|
|
168
|
+
i = 0
|
|
169
|
+
while i < n
|
|
170
|
+
e_ptr = base_ptr + (i * plp1_size)
|
|
171
|
+
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
|
|
172
|
+
alignments[i] = PileupRecord.new(entry, header_local)
|
|
173
|
+
i += 1
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
yield PileupColumn.new(tid: tid, pos: pos, alignments: alignments)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
self
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def reset
|
|
184
|
+
HTS::LibHTS.bam_plp_reset(@plp) if @plp && !@plp.null?
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def close
|
|
188
|
+
if @plp && !@plp.null?
|
|
189
|
+
HTS::LibHTS.bam_plp_destroy(@plp)
|
|
190
|
+
@plp = FFI::Pointer::NULL
|
|
191
|
+
end
|
|
192
|
+
if @itr && !@itr.null?
|
|
193
|
+
HTS::LibHTS.hts_itr_destroy(@itr)
|
|
194
|
+
@itr = FFI::Pointer::NULL
|
|
195
|
+
end
|
|
196
|
+
# Keep @cb referenced by instance to avoid GC during iteration.
|
|
197
|
+
@cb
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
data/lib/hts/bam/record.rb
CHANGED
|
@@ -326,6 +326,13 @@ module HTS
|
|
|
326
326
|
end
|
|
327
327
|
end
|
|
328
328
|
|
|
329
|
+
# Get base modification information from MM/ML tags
|
|
330
|
+
# @param auto_parse [Boolean] If true (default), parse lazily on first access
|
|
331
|
+
# @return [BaseMod] Base modification object
|
|
332
|
+
def base_mod(auto_parse: true)
|
|
333
|
+
BaseMod.new(self, auto_parse: auto_parse)
|
|
334
|
+
end
|
|
335
|
+
|
|
329
336
|
# TODO: add a method to get the auxiliary fields as a hash.
|
|
330
337
|
|
|
331
338
|
# TODO: add a method to set the auxiliary fields.
|
|
@@ -352,8 +359,13 @@ module HTS
|
|
|
352
359
|
private
|
|
353
360
|
|
|
354
361
|
def initialize_copy(orig)
|
|
362
|
+
super
|
|
355
363
|
@header = orig.header
|
|
356
|
-
|
|
364
|
+
# Deep-copy underlying bam1_t to detach from original buffer
|
|
365
|
+
dup_bam1 = LibHTS.bam_dup1(orig.struct)
|
|
366
|
+
raise "bam_dup1 failed" if dup_bam1.null?
|
|
367
|
+
|
|
368
|
+
@bam1 = dup_bam1
|
|
357
369
|
end
|
|
358
370
|
end
|
|
359
371
|
end
|
data/lib/hts/bam.rb
CHANGED
|
@@ -7,7 +7,9 @@ require_relative "bam/header"
|
|
|
7
7
|
require_relative "bam/cigar"
|
|
8
8
|
require_relative "bam/flag"
|
|
9
9
|
require_relative "bam/record"
|
|
10
|
-
|
|
10
|
+
require_relative "bam/base_mod"
|
|
11
|
+
require_relative "bam/pileup"
|
|
12
|
+
require_relative "bam/mpileup"
|
|
11
13
|
# require_relative "bam/pileup_entry"
|
|
12
14
|
|
|
13
15
|
module HTS
|
|
@@ -160,7 +162,7 @@ module HTS
|
|
|
160
162
|
|
|
161
163
|
position = tell
|
|
162
164
|
ary = map { |r| r.aux(tag) }
|
|
163
|
-
seek(position)
|
|
165
|
+
seek(position) if position
|
|
164
166
|
ary
|
|
165
167
|
end
|
|
166
168
|
|
|
@@ -194,6 +196,13 @@ module HTS
|
|
|
194
196
|
self
|
|
195
197
|
end
|
|
196
198
|
|
|
199
|
+
# Iterate alignment records in this file.
|
|
200
|
+
#
|
|
201
|
+
# Performance and memory semantics:
|
|
202
|
+
# - copy: false (default) reuses a single Record instance and its underlying bam1_t buffer.
|
|
203
|
+
# The yielded Record MUST NOT be stored beyond the block; its content will be overwritten
|
|
204
|
+
# by the next iteration. If you need to retain it, call `rec = rec.dup`.
|
|
205
|
+
# - copy: true yields a fresh Record per iteration (deep-copied via bam_dup1). Slower, safe to keep.
|
|
197
206
|
def each(copy: false, &block)
|
|
198
207
|
if copy
|
|
199
208
|
each_record_copy(&block)
|
|
@@ -202,23 +211,65 @@ module HTS
|
|
|
202
211
|
end
|
|
203
212
|
end
|
|
204
213
|
|
|
214
|
+
# Iterate records in a genomic region or multiple regions.
|
|
215
|
+
# See {#each} for copy semantics. When copy: false, the yielded Record is reused and should not be stored.
|
|
216
|
+
#
|
|
217
|
+
# @param region [String, Array<String>] Region specification(s)
|
|
218
|
+
# - Single region: "chr1:100-200" or "chr1" with beg/end parameters
|
|
219
|
+
# - Multiple regions: ["chr1:100-200", "chr2:500-600", ...]
|
|
220
|
+
# @param beg [Integer, nil] Start position (used with single string region)
|
|
221
|
+
# @param end_ [Integer, nil] End position (used with single string region)
|
|
222
|
+
# @param copy [Boolean] Whether to deep-copy records (see {#each})
|
|
223
|
+
#
|
|
224
|
+
# @example Single region query
|
|
225
|
+
# bam.query("chr1:100-200") { |r| puts r.qname }
|
|
226
|
+
# bam.query("chr1", 100, 200) { |r| puts r.qname }
|
|
227
|
+
#
|
|
228
|
+
# @example Multi-region query
|
|
229
|
+
# bam.query(["chr1:100-200", "chr2:500-600"]) { |r| puts r.qname }
|
|
205
230
|
def query(region, beg = nil, end_ = nil, copy: false, &block)
|
|
206
231
|
check_closed
|
|
207
232
|
raise "Index file is required to call the query method." unless index_loaded?
|
|
208
233
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
234
|
+
case region
|
|
235
|
+
when Array
|
|
236
|
+
raise ArgumentError, "beg and end_ cannot be used with array of regions" if beg || end_
|
|
237
|
+
|
|
238
|
+
query_regions(region, copy:, &block)
|
|
239
|
+
when String
|
|
240
|
+
if beg && end_
|
|
241
|
+
tid = header.get_tid(region)
|
|
242
|
+
queryi(tid, beg, end_, copy:, &block)
|
|
243
|
+
elsif beg.nil? && end_.nil?
|
|
244
|
+
querys(region, copy:, &block)
|
|
245
|
+
else
|
|
246
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
247
|
+
end
|
|
214
248
|
else
|
|
215
|
-
raise ArgumentError, "
|
|
249
|
+
raise ArgumentError, "region must be String or Array"
|
|
216
250
|
end
|
|
217
251
|
end
|
|
218
252
|
|
|
219
|
-
#
|
|
220
|
-
#
|
|
221
|
-
#
|
|
253
|
+
# Pileup iterator over this file. Optional region can be specified.
|
|
254
|
+
# When a block is given, uses RAII-style and ensures the iterator is closed at block end.
|
|
255
|
+
# Without a block, returns an Enumerator over a live Pileup instance; caller should close when done.
|
|
256
|
+
#
|
|
257
|
+
# @param region [String, nil] region string like "chr1:100-200"
|
|
258
|
+
# @param beg [Integer, nil]
|
|
259
|
+
# @param end_ [Integer, nil]
|
|
260
|
+
# @param maxcnt [Integer, nil] cap on depth per position
|
|
261
|
+
def pileup(region = nil, beg = nil, end_: nil, maxcnt: nil, &block)
|
|
262
|
+
check_closed
|
|
263
|
+
if block_given?
|
|
264
|
+
Pileup.open(self, region:, beg:, end_: end_, maxcnt: maxcnt) do |piter|
|
|
265
|
+
piter.each(&block)
|
|
266
|
+
end
|
|
267
|
+
self
|
|
268
|
+
else
|
|
269
|
+
piter = Pileup.new(self, region:, beg:, end_: end_, maxcnt: maxcnt)
|
|
270
|
+
piter.to_enum(:each)
|
|
271
|
+
end
|
|
272
|
+
end
|
|
222
273
|
|
|
223
274
|
private
|
|
224
275
|
|
|
@@ -238,6 +289,17 @@ module HTS
|
|
|
238
289
|
end
|
|
239
290
|
end
|
|
240
291
|
|
|
292
|
+
# Multi-region query implementation
|
|
293
|
+
def query_regions(regions, copy: false, &block)
|
|
294
|
+
if copy
|
|
295
|
+
query_regions_copy(regions, &block)
|
|
296
|
+
else
|
|
297
|
+
query_regions_reuse(regions, &block)
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Internal: yield a single reused Record over the entire file.
|
|
302
|
+
# The underlying bam1_t is mutated on each iteration for speed.
|
|
241
303
|
def each_record_reuse
|
|
242
304
|
check_closed
|
|
243
305
|
# Each does not always start at the beginning of the file.
|
|
@@ -250,6 +312,7 @@ module HTS
|
|
|
250
312
|
self
|
|
251
313
|
end
|
|
252
314
|
|
|
315
|
+
# Internal: yield deep-copied Records so callers may retain them safely.
|
|
253
316
|
def each_record_copy
|
|
254
317
|
check_closed
|
|
255
318
|
return to_enum(__method__) unless block_given?
|
|
@@ -301,6 +364,7 @@ module HTS
|
|
|
301
364
|
self
|
|
302
365
|
end
|
|
303
366
|
|
|
367
|
+
# Internal: reused-Record iterator over a query iterator.
|
|
304
368
|
def query_reuse_yield(qiter)
|
|
305
369
|
bam1 = LibHTS.bam_init1
|
|
306
370
|
record = Record.new(header, bam1)
|
|
@@ -323,5 +387,27 @@ module HTS
|
|
|
323
387
|
ensure
|
|
324
388
|
LibHTS.hts_itr_destroy(qiter)
|
|
325
389
|
end
|
|
390
|
+
|
|
391
|
+
# Multi-region query using sequential single-region queries
|
|
392
|
+
# Note: This is a fallback implementation. Ideally we would use sam_itr_regarray
|
|
393
|
+
# but there seem to be issues with the multi-region iterator in the current setup.
|
|
394
|
+
def query_regions_reuse(regions, &block)
|
|
395
|
+
return to_enum(__method__, regions) unless block_given?
|
|
396
|
+
|
|
397
|
+
regions.each do |region|
|
|
398
|
+
querys_reuse(region, &block)
|
|
399
|
+
end
|
|
400
|
+
self
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Multi-region query with copied Records using sequential queries
|
|
404
|
+
def query_regions_copy(regions, &block)
|
|
405
|
+
return to_enum(__method__, regions) unless block_given?
|
|
406
|
+
|
|
407
|
+
regions.each do |region|
|
|
408
|
+
querys_copy(region, &block)
|
|
409
|
+
end
|
|
410
|
+
self
|
|
411
|
+
end
|
|
326
412
|
end
|
|
327
413
|
end
|
data/lib/hts/bcf.rb
CHANGED
|
@@ -215,13 +215,20 @@ module HTS
|
|
|
215
215
|
raise "query is only available for BCF files" unless file_format == "bcf"
|
|
216
216
|
raise "Index file is required to call the query method." unless index_loaded?
|
|
217
217
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
218
|
+
case region
|
|
219
|
+
when Array
|
|
220
|
+
raise ArgumentError, "beg and end must not be specified when region is an Array" unless beg.nil? && end_.nil?
|
|
221
|
+
|
|
222
|
+
query_regions(region, copy:, &block)
|
|
223
223
|
else
|
|
224
|
-
|
|
224
|
+
if beg && end_
|
|
225
|
+
tid = header.name2id(region)
|
|
226
|
+
queryi(tid, beg, end_, copy:, &block)
|
|
227
|
+
elsif beg.nil? && end_.nil?
|
|
228
|
+
querys(region, copy:, &block)
|
|
229
|
+
else
|
|
230
|
+
raise ArgumentError, "beg and end must be specified together"
|
|
231
|
+
end
|
|
225
232
|
end
|
|
226
233
|
end
|
|
227
234
|
|
|
@@ -243,6 +250,14 @@ module HTS
|
|
|
243
250
|
end
|
|
244
251
|
end
|
|
245
252
|
|
|
253
|
+
def query_regions(regions, copy: false, &block)
|
|
254
|
+
if copy
|
|
255
|
+
query_regions_copy(regions, &block)
|
|
256
|
+
else
|
|
257
|
+
query_regions_reuse(regions, &block)
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
246
261
|
def queryi_reuse(tid, beg, end_, &block)
|
|
247
262
|
return to_enum(__method__, tid, beg, end_) unless block_given?
|
|
248
263
|
|
|
@@ -263,6 +278,15 @@ module HTS
|
|
|
263
278
|
self
|
|
264
279
|
end
|
|
265
280
|
|
|
281
|
+
def query_regions_reuse(regions, &block)
|
|
282
|
+
return to_enum(__method__, regions) unless block_given?
|
|
283
|
+
|
|
284
|
+
regions.each do |region|
|
|
285
|
+
querys_reuse(region, &block)
|
|
286
|
+
end
|
|
287
|
+
self
|
|
288
|
+
end
|
|
289
|
+
|
|
266
290
|
def query_reuse_yield(qiter)
|
|
267
291
|
bcf1 = LibHTS.bcf_init
|
|
268
292
|
record = Record.new(header, bcf1)
|
|
@@ -299,6 +323,15 @@ module HTS
|
|
|
299
323
|
self
|
|
300
324
|
end
|
|
301
325
|
|
|
326
|
+
def query_regions_copy(regions, &block)
|
|
327
|
+
return to_enum(__method__, regions) unless block_given?
|
|
328
|
+
|
|
329
|
+
regions.each do |region|
|
|
330
|
+
querys_copy(region, &block)
|
|
331
|
+
end
|
|
332
|
+
self
|
|
333
|
+
end
|
|
334
|
+
|
|
302
335
|
def query_copy_yield(qiter)
|
|
303
336
|
loop do
|
|
304
337
|
bcf1 = LibHTS.bcf_init
|
data/lib/hts/hts.rb
CHANGED
data/lib/hts/libhts/constants.rb
CHANGED
|
@@ -4,7 +4,6 @@ module HTS
|
|
|
4
4
|
# Module for working with C HTSlib.
|
|
5
5
|
module LibHTS
|
|
6
6
|
typedef :int64, :hts_pos_t
|
|
7
|
-
typedef :pointer, :bam_plp_auto_f
|
|
8
7
|
|
|
9
8
|
# kstring
|
|
10
9
|
|
|
@@ -352,6 +351,36 @@ module HTS
|
|
|
352
351
|
end
|
|
353
352
|
end
|
|
354
353
|
|
|
354
|
+
# Internal: Non-owning view of bam1_t used when the pointer is managed by HTSlib
|
|
355
|
+
# (e.g., pileup/mpileup). This struct mirrors the layout of bam1_t and MUST NOT
|
|
356
|
+
# free memory on GC. Do not expose publicly; use only for read-only access.
|
|
357
|
+
class Bam1View < FFI::Struct
|
|
358
|
+
layout \
|
|
359
|
+
:core, Bam1Core,
|
|
360
|
+
:id, :uint64,
|
|
361
|
+
:data, :pointer, # uint8_t
|
|
362
|
+
:l_data, :int,
|
|
363
|
+
:m_data, :uint32,
|
|
364
|
+
:_mempolicy, :uint32 # bit_fields
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Base modification structure
|
|
368
|
+
class HtsBaseMod < FFI::Struct
|
|
369
|
+
layout \
|
|
370
|
+
:modified_base, :int,
|
|
371
|
+
:canonical_base, :int,
|
|
372
|
+
:strand, :int,
|
|
373
|
+
:qual, :int
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# Base modification state (opaque pointer)
|
|
377
|
+
# Use AutoPointer since the structure is opaque and we only need custom release.
|
|
378
|
+
class HtsBaseModState < FFI::AutoPointer
|
|
379
|
+
def self.release(ptr)
|
|
380
|
+
LibHTS.hts_base_mod_state_free(ptr) unless ptr.null?
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
355
384
|
typedef :pointer, :bam_plp
|
|
356
385
|
typedef :pointer, :bam_mplp
|
|
357
386
|
|
|
@@ -364,7 +393,7 @@ module HTS
|
|
|
364
393
|
|
|
365
394
|
class BamPileup1 < FFI::BitStruct
|
|
366
395
|
layout \
|
|
367
|
-
:b,
|
|
396
|
+
:b, :pointer,
|
|
368
397
|
:qpos, :int32,
|
|
369
398
|
:indel, :int,
|
|
370
399
|
:level, :int,
|
data/lib/hts/libhts/sam.rb
CHANGED
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
module HTS
|
|
4
4
|
module LibHTS
|
|
5
|
+
# Callback type for bam_plp_auto_f: int (*)(void *data, bam1_t *b)
|
|
6
|
+
# Use raw pointer for bam1_t to avoid creating ManagedStruct wrappers (which would double-free)
|
|
7
|
+
callback :bam_plp_auto_f, %i[pointer pointer], :int
|
|
8
|
+
# callback :bam_plp_auto_f, [:pointer, Bam1.by_ref], :int
|
|
9
|
+
|
|
5
10
|
# Generates a new unpopulated header structure.
|
|
6
11
|
attach_function \
|
|
7
12
|
:sam_hdr_init,
|
|
@@ -414,24 +419,24 @@ module HTS
|
|
|
414
419
|
|
|
415
420
|
attach_function \
|
|
416
421
|
:sam_parse1,
|
|
417
|
-
[KString, SamHdr, Bam1]
|
|
422
|
+
[KString, SamHdr, :pointer], # [KString, SamHdr, (Bam1 | Bam1View)]
|
|
418
423
|
:int
|
|
419
424
|
|
|
420
425
|
attach_function \
|
|
421
426
|
:sam_format1,
|
|
422
|
-
[SamHdr, Bam1, KString]
|
|
427
|
+
[SamHdr, :pointer, KString], # [SamHdr, (Bam1 | Bam1View), KString]
|
|
423
428
|
:int
|
|
424
429
|
|
|
425
430
|
# Read a record from a file
|
|
426
431
|
attach_function \
|
|
427
432
|
:sam_read1,
|
|
428
|
-
[HtsFile, SamHdr, Bam1]
|
|
433
|
+
[HtsFile, SamHdr, :pointer], # [HtsFile, SamHdr, (Bam1 | Bam1View)]
|
|
429
434
|
:int
|
|
430
435
|
|
|
431
436
|
# Write a record to a file
|
|
432
437
|
attach_function \
|
|
433
438
|
:sam_write1,
|
|
434
|
-
[HtsFile, SamHdr, Bam1]
|
|
439
|
+
[HtsFile, SamHdr, :pointer], # [HtsFile, SamHdr, (Bam1 | Bam1View)]
|
|
435
440
|
:int
|
|
436
441
|
|
|
437
442
|
# Checks whether a record passes an hts_filter.
|
|
@@ -555,28 +560,28 @@ module HTS
|
|
|
555
560
|
|
|
556
561
|
attach_function \
|
|
557
562
|
:bam_plp_push,
|
|
558
|
-
[:bam_plp, Bam1],
|
|
563
|
+
[:bam_plp, Bam1.by_ref],
|
|
559
564
|
:int
|
|
560
565
|
|
|
561
566
|
attach_function \
|
|
562
567
|
:bam_plp_next,
|
|
563
568
|
%i[bam_plp pointer pointer pointer],
|
|
564
|
-
BamPileup1.by_ref
|
|
569
|
+
:pointer # BamPileup1.by_ref
|
|
565
570
|
|
|
566
571
|
attach_function \
|
|
567
572
|
:bam_plp_auto,
|
|
568
573
|
%i[bam_plp pointer pointer pointer],
|
|
569
|
-
BamPileup1.by_ref
|
|
574
|
+
:pointer # BamPileup1.by_ref
|
|
570
575
|
|
|
571
576
|
attach_function \
|
|
572
577
|
:bam_plp64_next,
|
|
573
578
|
%i[bam_plp pointer pointer pointer],
|
|
574
|
-
BamPileup1.by_ref
|
|
579
|
+
:pointer # BamPileup1.by_ref
|
|
575
580
|
|
|
576
581
|
attach_function \
|
|
577
582
|
:bam_plp64_auto,
|
|
578
583
|
%i[bam_plp pointer pointer pointer],
|
|
579
|
-
BamPileup1.by_ref
|
|
584
|
+
:pointer # BamPileup1.by_ref
|
|
580
585
|
|
|
581
586
|
attach_function \
|
|
582
587
|
:bam_plp_set_maxcnt,
|
|
@@ -588,7 +593,9 @@ module HTS
|
|
|
588
593
|
[:bam_plp],
|
|
589
594
|
:void
|
|
590
595
|
|
|
591
|
-
|
|
596
|
+
# Callback type for constructor/destructor: int (*)(void *data, const bam1_t *b, bam_pileup_cd *cd)
|
|
597
|
+
callback :bam_plp_callback_function, [:pointer, :pointer, BamPileupCd.by_ref], :int
|
|
598
|
+
# callback :bam_plp_callback_function, [:pointer, Bam1.by_ref, BamPileupCd.by_ref], :int
|
|
592
599
|
|
|
593
600
|
# sets a callback to initialise any per-pileup1_t fields.
|
|
594
601
|
attach_function \
|
|
@@ -602,17 +609,21 @@ module HTS
|
|
|
602
609
|
:void
|
|
603
610
|
|
|
604
611
|
# Get pileup padded insertion sequence
|
|
612
|
+
# Make pointer passing explicit by using by_ref for structs
|
|
605
613
|
attach_function \
|
|
606
614
|
:bam_plp_insertion,
|
|
607
|
-
[BamPileup1, KString, :pointer],
|
|
615
|
+
[BamPileup1.by_ref, KString.by_ref, :pointer],
|
|
608
616
|
:int
|
|
609
617
|
|
|
610
618
|
# Get pileup padded insertion sequence, including base modifications
|
|
611
619
|
attach_function \
|
|
612
620
|
:bam_plp_insertion_mod,
|
|
613
|
-
[BamPileup1,
|
|
621
|
+
[BamPileup1.by_ref, HtsBaseModState, KString.by_ref, :pointer],
|
|
614
622
|
:int
|
|
615
623
|
|
|
624
|
+
# NOTE: There is no bam_plp_init_overlaps in HTSlib (only bam_mplp_init_overlaps exists).
|
|
625
|
+
# The incorrect binding is removed to avoid undefined symbol errors.
|
|
626
|
+
|
|
616
627
|
attach_function \
|
|
617
628
|
:bam_mplp_init,
|
|
618
629
|
%i[int bam_plp_auto_f pointer],
|
|
@@ -672,61 +683,61 @@ module HTS
|
|
|
672
683
|
attach_function \
|
|
673
684
|
:hts_base_mod_state_alloc,
|
|
674
685
|
[],
|
|
675
|
-
|
|
686
|
+
HtsBaseModState
|
|
676
687
|
|
|
677
688
|
# Destroys an hts_base_mode_state.
|
|
678
689
|
attach_function \
|
|
679
690
|
:hts_base_mod_state_free,
|
|
680
|
-
[
|
|
691
|
+
[HtsBaseModState],
|
|
681
692
|
:void
|
|
682
693
|
|
|
683
694
|
# Parses the MM and ML tags out of a bam record.
|
|
684
695
|
attach_function \
|
|
685
696
|
:bam_parse_basemod,
|
|
686
|
-
[Bam1,
|
|
697
|
+
[Bam1, HtsBaseModState],
|
|
687
698
|
:int
|
|
688
699
|
|
|
689
700
|
# Parses the MM and ML tags out of a bam record.
|
|
690
701
|
attach_function \
|
|
691
702
|
:bam_parse_basemod2,
|
|
692
|
-
[Bam1,
|
|
703
|
+
[Bam1, HtsBaseModState, :uint32],
|
|
693
704
|
:int
|
|
694
705
|
|
|
695
706
|
# Returns modification status for the next base position in the query seq.
|
|
696
707
|
attach_function \
|
|
697
708
|
:bam_mods_at_next_pos,
|
|
698
|
-
[Bam1,
|
|
709
|
+
[Bam1, HtsBaseModState, :pointer, :int],
|
|
699
710
|
:int
|
|
700
711
|
|
|
701
712
|
# Finds the next location containing base modifications and returns them
|
|
702
713
|
attach_function \
|
|
703
714
|
:bam_next_basemod,
|
|
704
|
-
[Bam1,
|
|
715
|
+
[Bam1, HtsBaseModState, :pointer, :int, :pointer],
|
|
705
716
|
:int
|
|
706
717
|
|
|
707
718
|
# Returns modification status for a specific query position.
|
|
708
719
|
attach_function \
|
|
709
720
|
:bam_mods_at_qpos,
|
|
710
|
-
[Bam1, :int,
|
|
721
|
+
[Bam1, :int, HtsBaseModState, :pointer, :int],
|
|
711
722
|
:int
|
|
712
723
|
|
|
713
724
|
# Returns data about a specific modification type for the alignment record.
|
|
714
725
|
attach_function \
|
|
715
726
|
:bam_mods_query_type,
|
|
716
|
-
|
|
727
|
+
[HtsBaseModState, :int, :pointer, :pointer, :pointer],
|
|
717
728
|
:int
|
|
718
729
|
|
|
719
730
|
# Returns data about the i^th modification type for the alignment record.
|
|
720
731
|
attach_function \
|
|
721
732
|
:bam_mods_queryi,
|
|
722
|
-
|
|
733
|
+
[HtsBaseModState, :int, :pointer, :pointer, :pointer],
|
|
723
734
|
:int
|
|
724
735
|
|
|
725
736
|
# Returns the list of base modification codes provided for this
|
|
726
737
|
attach_function \
|
|
727
738
|
:bam_mods_recorded,
|
|
728
|
-
|
|
729
|
-
:
|
|
739
|
+
[HtsBaseModState, :pointer],
|
|
740
|
+
:pointer
|
|
730
741
|
end
|
|
731
742
|
end
|
|
732
743
|
|
data/lib/hts/libhts.rb
CHANGED
data/lib/hts/tabix.rb
CHANGED
|
@@ -44,8 +44,27 @@ module HTS
|
|
|
44
44
|
@idx = load_index(index)
|
|
45
45
|
end
|
|
46
46
|
|
|
47
|
-
def build_index
|
|
48
|
-
|
|
47
|
+
def build_index(index_name = nil, min_shift: 0)
|
|
48
|
+
check_closed
|
|
49
|
+
|
|
50
|
+
if index_name
|
|
51
|
+
warn "Create index for #{@file_name} to #{index_name}"
|
|
52
|
+
case LibHTS.tbx_index_build2(@file_name, index_name, min_shift, LibHTS.tbx_conf_vcf)
|
|
53
|
+
when 0 # successful
|
|
54
|
+
when -1 then raise "general failure"
|
|
55
|
+
when -2 then raise "compression not BGZF"
|
|
56
|
+
else raise "unknown error"
|
|
57
|
+
end
|
|
58
|
+
else
|
|
59
|
+
warn "Create index for #{@file_name}"
|
|
60
|
+
case LibHTS.tbx_index_build(@file_name, min_shift, LibHTS.tbx_conf_vcf)
|
|
61
|
+
when 0 # successful
|
|
62
|
+
when -1 then raise "general failure"
|
|
63
|
+
when -2 then raise "compression not BGZF"
|
|
64
|
+
else raise "unknown error"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
self # for method chaining
|
|
49
68
|
end
|
|
50
69
|
|
|
51
70
|
def load_index(index_name = nil)
|
data/lib/hts/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: htslib
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
@@ -62,10 +62,13 @@ files:
|
|
|
62
62
|
- TUTORIAL.md
|
|
63
63
|
- lib/hts/bam.rb
|
|
64
64
|
- lib/hts/bam/auxi.rb
|
|
65
|
+
- lib/hts/bam/base_mod.rb
|
|
65
66
|
- lib/hts/bam/cigar.rb
|
|
66
67
|
- lib/hts/bam/flag.rb
|
|
67
68
|
- lib/hts/bam/header.rb
|
|
68
69
|
- lib/hts/bam/header_record.rb
|
|
70
|
+
- lib/hts/bam/mpileup.rb
|
|
71
|
+
- lib/hts/bam/pileup.rb
|
|
69
72
|
- lib/hts/bam/record.rb
|
|
70
73
|
- lib/hts/bcf.rb
|
|
71
74
|
- lib/hts/bcf/format.rb
|
|
@@ -116,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
116
119
|
- !ruby/object:Gem::Version
|
|
117
120
|
version: '0'
|
|
118
121
|
requirements: []
|
|
119
|
-
rubygems_version: 3.
|
|
122
|
+
rubygems_version: 3.6.9
|
|
120
123
|
specification_version: 4
|
|
121
124
|
summary: HTSlib bindings for Ruby
|
|
122
125
|
test_files: []
|