htslib 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/hts/bam/base_mod.rb +343 -0
- data/lib/hts/bam/header.rb +17 -0
- data/lib/hts/bam/mpileup.rb +175 -0
- data/lib/hts/bam/pileup.rb +201 -0
- data/lib/hts/bam/record.rb +13 -1
- data/lib/hts/bam.rb +114 -11
- data/lib/hts/bcf.rb +42 -9
- data/lib/hts/faidx.rb +1 -1
- data/lib/hts/hts.rb +1 -1
- data/lib/hts/libhts/constants.rb +39 -6
- data/lib/hts/libhts/sam.rb +34 -23
- data/lib/hts/libhts/sam_funcs.rb +1 -1
- data/lib/hts/libhts/vcf_funcs.rb +1 -1
- data/lib/hts/libhts.rb +6 -0
- data/lib/hts/tabix.rb +22 -3
- data/lib/hts/version.rb +1 -1
- metadata +6 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e1bf158506931c62ffae1a524158de9fbb451796a68a7586cf788203f67a8cc4
|
|
4
|
+
data.tar.gz: d3289551dac8783cfa23f1f8d44e1b4be44b6dab3d6369f816491ceea653188f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d1c316a599c2dc08e6589f980e9dd4ae6d8d6bababbfef424f35e5c25453b667bdc48570578e88a9eba142553fffb25b49ae4de54a061f99f7cbf286988ec618
|
|
7
|
+
data.tar.gz: 8529c6a02c354419dd722abc0bd6f27ec514ffe980f4b0d7014914de11551a45c4e96b803bc77255c3b423129a8743a3c39556d9d3d44ec6c6692556485379cf
|
data/README.md
CHANGED
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
[](https://zenodo.org/badge/latestdoi/247078205)
|
|
7
7
|
[](https://rubydoc.info/gems/htslib)
|
|
8
8
|
|
|
9
|
+
[](https://deepwiki.com/kojix2/ruby-htslib)
|
|
10
|
+
[](https://tokei.kojix2.net/github/kojix2/ruby-htslib)
|
|
11
|
+
|
|
9
12
|
Ruby-htslib is the [Ruby](https://www.ruby-lang.org) bindings to [HTSlib](https://github.com/samtools/htslib), a C library for high-throughput sequencing data formats. It allows you to read and write file formats commonly used in genomics, such as [SAM, BAM, VCF, and BCF](http://samtools.github.io/hts-specs/), in the Ruby language.
|
|
10
13
|
|
|
11
14
|
:apple: Feel free to fork it!
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# Base modification information from MM/ML tags
|
|
6
|
+
#
|
|
7
|
+
# This class provides access to DNA/RNA base modifications such as methylation.
|
|
8
|
+
# It wraps the htslib base modification API and provides a Ruby-friendly interface.
|
|
9
|
+
#
|
|
10
|
+
# @note BaseMod is a view object that references data in a Record.
|
|
11
|
+
# The state is maintained in hts_base_mod_state structure.
|
|
12
|
+
class BaseMod
|
|
13
|
+
include Enumerable
|
|
14
|
+
|
|
15
|
+
class NotParsedError < StandardError; end
|
|
16
|
+
|
|
17
|
+
attr_reader :record
|
|
18
|
+
|
|
19
|
+
# Individual base modification information
|
|
20
|
+
class Modification
|
|
21
|
+
attr_reader :modified_base, :canonical_base, :strand, :qual
|
|
22
|
+
|
|
23
|
+
# @param modified_base [Integer] Modification code as char or -ChEBI
|
|
24
|
+
# @param canonical_base [Integer] Canonical base (A, C, G, T, N)
|
|
25
|
+
# @param strand [Integer] 0 or 1 for +/- strand
|
|
26
|
+
# @param qual [Integer] Quality (256*probability) or -1 if unknown
|
|
27
|
+
def initialize(modified_base:, canonical_base:, strand:, qual:)
|
|
28
|
+
@modified_base = modified_base
|
|
29
|
+
@canonical_base = canonical_base
|
|
30
|
+
@strand = strand
|
|
31
|
+
@qual = qual
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Get modification code as character or ChEBI number as string
|
|
35
|
+
# @return [String] Single character code or ChEBI number as string
|
|
36
|
+
def code
|
|
37
|
+
@modified_base > 0 ? @modified_base.chr : @modified_base.to_s
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Get canonical base as character
|
|
41
|
+
# @return [String] Single character (A, C, G, T, N)
|
|
42
|
+
def canonical
|
|
43
|
+
@canonical_base.chr
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Get likelihood as a probability (0.0-1.0)
|
|
47
|
+
# @return [Float, nil] Probability or nil if qual is -1
|
|
48
|
+
def probability
|
|
49
|
+
return nil if @qual == -1
|
|
50
|
+
|
|
51
|
+
@qual / 256.0
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Convert to hash representation
|
|
55
|
+
# @return [Hash] Hash with modification information
|
|
56
|
+
def to_h
|
|
57
|
+
{
|
|
58
|
+
modified_base: @modified_base,
|
|
59
|
+
code: code,
|
|
60
|
+
canonical_base: @canonical_base,
|
|
61
|
+
canonical: canonical,
|
|
62
|
+
strand: @strand,
|
|
63
|
+
qual: @qual,
|
|
64
|
+
probability: probability
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# String representation
|
|
69
|
+
# @return [String] String representation of the modification
|
|
70
|
+
def to_s
|
|
71
|
+
if @qual >= 0
|
|
72
|
+
"#{canonical}->#{code}(#{probability.round(3)})"
|
|
73
|
+
else
|
|
74
|
+
"#{canonical}->#{code}"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Inspect string
|
|
79
|
+
# @return [String] Inspect string
|
|
80
|
+
def inspect
|
|
81
|
+
"#<HTS::Bam::BaseMod::Modification #{self}>"
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Position-specific modification information
|
|
86
|
+
class Position
|
|
87
|
+
attr_reader :position, :modifications
|
|
88
|
+
|
|
89
|
+
# @param position [Integer] Position in query sequence
|
|
90
|
+
# @param modifications [Array<Modification>] Array of modifications at this position
|
|
91
|
+
def initialize(position, modifications)
|
|
92
|
+
@position = position
|
|
93
|
+
@modifications = modifications
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Check if this position has methylation
|
|
97
|
+
# @return [Boolean] true if any modification is methylation ('m')
|
|
98
|
+
def methylated?
|
|
99
|
+
@modifications.any? { |m| m.code == "m" }
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Check if this position has hydroxymethylation
|
|
103
|
+
# @return [Boolean] true if any modification is hydroxymethylation ('h')
|
|
104
|
+
def hydroxymethylated?
|
|
105
|
+
@modifications.any? { |m| m.code == "h" }
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Convert to hash representation
|
|
109
|
+
# @return [Hash] Hash with position information
|
|
110
|
+
def to_h
|
|
111
|
+
{
|
|
112
|
+
position: @position,
|
|
113
|
+
modifications: @modifications.map(&:to_h)
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# String representation
|
|
118
|
+
# @return [String] String representation
|
|
119
|
+
def to_s
|
|
120
|
+
mods_str = @modifications.map(&:to_s).join(", ")
|
|
121
|
+
"pos=#{@position} [#{mods_str}]"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Inspect string
|
|
125
|
+
# @return [String] Inspect string
|
|
126
|
+
def inspect
|
|
127
|
+
"#<HTS::Bam::BaseMod::Position #{self}>"
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Initialize a new BaseMod object
|
|
132
|
+
# @param record [Record] The BAM record to extract modifications from
|
|
133
|
+
# @param auto_parse [Boolean] If true, parse MM/ML lazily on first access
|
|
134
|
+
def initialize(record, auto_parse: true)
|
|
135
|
+
@record = record
|
|
136
|
+
@state = LibHTS.hts_base_mod_state_alloc
|
|
137
|
+
@closed = false
|
|
138
|
+
@auto_parse = !!auto_parse
|
|
139
|
+
@parsed = false
|
|
140
|
+
raise Error, "Failed to allocate hts_base_mod_state" if @state.null?
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Explicitly free the state
|
|
144
|
+
# @return [void]
|
|
145
|
+
def close
|
|
146
|
+
return if @closed
|
|
147
|
+
|
|
148
|
+
# With HtsBaseModState as an AutoPointer, releasing the Ruby object
|
|
149
|
+
# is sufficient. Avoid manual free to prevent double-free.
|
|
150
|
+
@state = nil
|
|
151
|
+
@closed = true
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Whether this object has parsed MM/ML tags already
|
|
155
|
+
# @return [Boolean]
|
|
156
|
+
def parsed?
|
|
157
|
+
@parsed
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Ensure MM/ML have been parsed, performing lazy parse if enabled.
|
|
161
|
+
# @param flags [Integer]
|
|
162
|
+
# @return [void]
|
|
163
|
+
def ensure_parsed!(flags = 0)
|
|
164
|
+
return if @parsed
|
|
165
|
+
|
|
166
|
+
raise NotParsedError, "BaseMod is not parsed. Call #parse first (auto_parse is disabled)." unless @auto_parse
|
|
167
|
+
|
|
168
|
+
parse(flags)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Parse MM and ML tags from the record
|
|
172
|
+
# @param flags [Integer] Parsing flags (default: 0)
|
|
173
|
+
# @return [Integer] Number of modification types found, or -1 on error
|
|
174
|
+
# @raise [Error] If parsing fails
|
|
175
|
+
def parse(flags = 0)
|
|
176
|
+
ret = LibHTS.bam_parse_basemod2(@record.struct, @state, flags)
|
|
177
|
+
raise Error, "Failed to parse base modifications" if ret < 0
|
|
178
|
+
|
|
179
|
+
@parsed = true
|
|
180
|
+
ret
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Get modification information at a specific query position
|
|
184
|
+
# @param position [Integer] Query position (0-based)
|
|
185
|
+
# @param max_mods [Integer] Maximum number of modifications to retrieve
|
|
186
|
+
# @return [Position, nil] Position object with modifications, or nil if none
|
|
187
|
+
def at_pos(position, max_mods: 10)
|
|
188
|
+
# Reset state to ensure deterministic results even after prior iteration
|
|
189
|
+
parsed? ? parse : ensure_parsed!
|
|
190
|
+
|
|
191
|
+
mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
|
|
192
|
+
|
|
193
|
+
ret = LibHTS.bam_mods_at_qpos(@record.struct, position, @state,
|
|
194
|
+
mods_ptr, max_mods)
|
|
195
|
+
return nil if ret <= 0
|
|
196
|
+
|
|
197
|
+
build_position(position, mods_ptr, [ret, max_mods].min)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Array-style access to modifications at a position
|
|
201
|
+
# @param position [Integer] Query position (0-based)
|
|
202
|
+
# @return [Position, nil] Position object with modifications, or nil if none
|
|
203
|
+
def [](position)
|
|
204
|
+
at_pos(position)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Iterate over all positions with modifications
|
|
208
|
+
# @param max_mods [Integer] Maximum number of modifications per position
|
|
209
|
+
# @yield [Position] Position object for each modified position
|
|
210
|
+
# @return [Enumerator] If no block given
|
|
211
|
+
def each_position(max_mods: 10)
|
|
212
|
+
return enum_for(__method__, max_mods: max_mods) unless block_given?
|
|
213
|
+
|
|
214
|
+
# Reset state at the start of iteration to allow repeated enumerations
|
|
215
|
+
parsed? ? parse : ensure_parsed!
|
|
216
|
+
|
|
217
|
+
pos_ptr = FFI::MemoryPointer.new(:int)
|
|
218
|
+
mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
|
|
219
|
+
|
|
220
|
+
loop do
|
|
221
|
+
ret = LibHTS.bam_next_basemod(@record.struct, @state,
|
|
222
|
+
mods_ptr, max_mods, pos_ptr)
|
|
223
|
+
break if ret <= 0
|
|
224
|
+
|
|
225
|
+
position = pos_ptr.read_int
|
|
226
|
+
yield build_position(position, mods_ptr, [ret, max_mods].min)
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
alias each each_position
|
|
231
|
+
|
|
232
|
+
# Get list of modification types present in this record
|
|
233
|
+
# @return [Array<Integer>] Array of modification codes (char code or -ChEBI)
|
|
234
|
+
def modification_types
|
|
235
|
+
ensure_parsed!
|
|
236
|
+
|
|
237
|
+
ntype_ptr = FFI::MemoryPointer.new(:int)
|
|
238
|
+
codes_ptr = LibHTS.bam_mods_recorded(@state, ntype_ptr)
|
|
239
|
+
|
|
240
|
+
ntype = ntype_ptr.read_int
|
|
241
|
+
return [] if ntype <= 0 || codes_ptr.null?
|
|
242
|
+
|
|
243
|
+
codes_ptr.read_array_of_int(ntype)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
alias recorded_types modification_types
|
|
247
|
+
|
|
248
|
+
# Query information about a specific modification type by code
|
|
249
|
+
# @param code [Integer, String] Modification code (char code or -ChEBI, or single char string)
|
|
250
|
+
# @return [Hash, nil] Hash with canonical, strand, implicit info, or nil if not found
|
|
251
|
+
def query_type(code)
|
|
252
|
+
ensure_parsed!
|
|
253
|
+
|
|
254
|
+
code = code.ord if code.is_a?(String)
|
|
255
|
+
|
|
256
|
+
strand_ptr = FFI::MemoryPointer.new(:int)
|
|
257
|
+
implicit_ptr = FFI::MemoryPointer.new(:int)
|
|
258
|
+
canonical_ptr = FFI::MemoryPointer.new(:char, 1)
|
|
259
|
+
|
|
260
|
+
ret = LibHTS.bam_mods_query_type(@state, code, strand_ptr,
|
|
261
|
+
implicit_ptr, canonical_ptr)
|
|
262
|
+
return nil if ret < 0
|
|
263
|
+
|
|
264
|
+
{
|
|
265
|
+
canonical: canonical_ptr.read_char.chr,
|
|
266
|
+
strand: strand_ptr.read_int,
|
|
267
|
+
implicit: implicit_ptr.read_int != 0
|
|
268
|
+
}
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Query information about i-th modification type
|
|
272
|
+
# @param index [Integer] Modification type index (0-based)
|
|
273
|
+
# @return [Hash, nil] Hash with code, canonical, strand, implicit info
|
|
274
|
+
def query_type_at(index)
|
|
275
|
+
ensure_parsed!
|
|
276
|
+
|
|
277
|
+
strand_ptr = FFI::MemoryPointer.new(:int)
|
|
278
|
+
implicit_ptr = FFI::MemoryPointer.new(:int)
|
|
279
|
+
canonical_ptr = FFI::MemoryPointer.new(:char, 1)
|
|
280
|
+
|
|
281
|
+
ret = LibHTS.bam_mods_queryi(@state, index, strand_ptr,
|
|
282
|
+
implicit_ptr, canonical_ptr)
|
|
283
|
+
return nil if ret < 0
|
|
284
|
+
|
|
285
|
+
types = modification_types
|
|
286
|
+
{
|
|
287
|
+
code: types[index],
|
|
288
|
+
canonical: canonical_ptr.read_char.chr,
|
|
289
|
+
strand: strand_ptr.read_int,
|
|
290
|
+
implicit: implicit_ptr.read_int != 0
|
|
291
|
+
}
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Get all modifications as an array
|
|
295
|
+
# @return [Array<Position>] Array of all positions with modifications
|
|
296
|
+
def to_a
|
|
297
|
+
each_position.to_a
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# String representation for debugging
|
|
301
|
+
# @return [String] String representation
|
|
302
|
+
def to_s
|
|
303
|
+
return "#<HTS::Bam::BaseMod (not parsed)>" unless @parsed
|
|
304
|
+
|
|
305
|
+
mods = []
|
|
306
|
+
each_position do |pos|
|
|
307
|
+
mods << pos.to_s
|
|
308
|
+
end
|
|
309
|
+
"#<HTS::Bam::BaseMod #{mods.join(' ')}>"
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Inspect string
|
|
313
|
+
# @return [String] Inspect string
|
|
314
|
+
def inspect
|
|
315
|
+
to_s
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
private
|
|
319
|
+
|
|
320
|
+
# Build Position object from hts_base_mod array
|
|
321
|
+
# @param position [Integer] Query position
|
|
322
|
+
# @param mods_ptr [FFI::Pointer] Pointer to array of HtsBaseMod structures
|
|
323
|
+
# @param n_mods [Integer] Number of modifications
|
|
324
|
+
# @return [Position] Position object
|
|
325
|
+
def build_position(position, mods_ptr, n_mods)
|
|
326
|
+
modifications = []
|
|
327
|
+
|
|
328
|
+
n_mods.times do |i|
|
|
329
|
+
mod_struct = LibHTS::HtsBaseMod.new(mods_ptr + i * LibHTS::HtsBaseMod.size)
|
|
330
|
+
|
|
331
|
+
modifications << Modification.new(
|
|
332
|
+
modified_base: mod_struct[:modified_base],
|
|
333
|
+
canonical_base: mod_struct[:canonical_base],
|
|
334
|
+
strand: mod_struct[:strand],
|
|
335
|
+
qual: mod_struct[:qual]
|
|
336
|
+
)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
Position.new(position, modifications)
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
end
|
data/lib/hts/bam/header.rb
CHANGED
|
@@ -111,6 +111,23 @@ module HTS
|
|
|
111
111
|
name2tid(name)
|
|
112
112
|
end
|
|
113
113
|
|
|
114
|
+
# Add a @PG (program) line to the header
|
|
115
|
+
# @param program_name [String] Name of the program
|
|
116
|
+
# @param options [Hash] Key-value pairs for @PG tags (ID, PN, VN, CL, PP, etc.)
|
|
117
|
+
# @return [Integer] 0 on success, -1 on failure
|
|
118
|
+
#
|
|
119
|
+
# This is a convenience wrapper around sam_hdr_add_pg that automatically:
|
|
120
|
+
# - Generates a unique ID if the specified one clashes
|
|
121
|
+
# - Manages PP (previous program) chains automatically
|
|
122
|
+
#
|
|
123
|
+
# @example
|
|
124
|
+
# header.add_pg("bwa", VN: "0.7.17", CL: "bwa mem ref.fa read.fq")
|
|
125
|
+
# header.add_pg("samtools", VN: "1.15", PP: "bwa")
|
|
126
|
+
def add_pg(program_name, **options)
|
|
127
|
+
args = options.flat_map { |k, v| [:string, k.to_s, :string, v.to_s] }
|
|
128
|
+
LibHTS.sam_hdr_add_pg(@sam_hdr, program_name, *args, :pointer, FFI::Pointer::NULL)
|
|
129
|
+
end
|
|
130
|
+
|
|
114
131
|
private
|
|
115
132
|
|
|
116
133
|
def name2tid(name)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bam < Hts
|
|
5
|
+
# High-level mpileup iterator over multiple BAM/CRAM inputs
|
|
6
|
+
class Mpileup
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
# Usage:
|
|
10
|
+
# HTS::Bam::Mpileup.open([bam1, bam2], region: "chr1:1-100") do |mpl|
|
|
11
|
+
# mpl.each { |cols| ... }
|
|
12
|
+
# end
|
|
13
|
+
def self.open(*args, **kw)
|
|
14
|
+
m = new(*args, **kw)
|
|
15
|
+
return m unless block_given?
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
yield m
|
|
19
|
+
ensure
|
|
20
|
+
m.close
|
|
21
|
+
end
|
|
22
|
+
m
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Normalize inputs to HTS::Bam instances
|
|
26
|
+
# Accepts array of HTS::Bam or filenames (String)
|
|
27
|
+
def initialize(inputs, region: nil, beg: nil, end_: nil, maxcnt: nil, overlaps: false)
|
|
28
|
+
raise ArgumentError, "inputs must be non-empty" if inputs.nil? || inputs.empty?
|
|
29
|
+
|
|
30
|
+
@owned_bams = [] # Bams we opened here; will be closed on close
|
|
31
|
+
@bams = inputs.map do |x|
|
|
32
|
+
case x
|
|
33
|
+
when HTS::Bam
|
|
34
|
+
x
|
|
35
|
+
when String
|
|
36
|
+
b = HTS::Bam.open(x)
|
|
37
|
+
@owned_bams << b
|
|
38
|
+
b
|
|
39
|
+
else
|
|
40
|
+
raise ArgumentError, "Unsupported input type: #{x.class}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
n = @bams.length
|
|
45
|
+
@iters = []
|
|
46
|
+
@data_blocks = [] # per-input packed pointers kept alive
|
|
47
|
+
|
|
48
|
+
# Prepare optional region iterators for each input
|
|
49
|
+
@bams.each_with_index do |bam, i|
|
|
50
|
+
itr = nil
|
|
51
|
+
if region && beg.nil? && end_.nil?
|
|
52
|
+
raise "Index required for region mpileup" unless bam.index_loaded?
|
|
53
|
+
|
|
54
|
+
itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), bam.header.struct, region)
|
|
55
|
+
raise "Failed to query region on input ##{i}: #{region}" if itr.null?
|
|
56
|
+
elsif region && beg && end_
|
|
57
|
+
raise "Index required for region mpileup" unless bam.index_loaded?
|
|
58
|
+
|
|
59
|
+
tid = bam.header.get_tid(region)
|
|
60
|
+
itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
|
|
61
|
+
raise "Failed to query region on input ##{i}: #{region} #{beg} #{end_}" if itr.null?
|
|
62
|
+
elsif beg || end_
|
|
63
|
+
raise ArgumentError, "beg and end_ must be specified together"
|
|
64
|
+
end
|
|
65
|
+
@iters << itr
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build per-input packed pointer blocks so C passes them back to the callback.
|
|
69
|
+
# Layout per input: [0] hts_fp (htsFile*), [1] hdr_struct (bam_hdr_t*), [2] itr (hts_itr_t* or NULL)
|
|
70
|
+
ptr_size = FFI.type_size(:pointer)
|
|
71
|
+
data_array = FFI::MemoryPointer.new(:pointer, n)
|
|
72
|
+
@bams.each_with_index do |bam, i|
|
|
73
|
+
hts_fp = bam.instance_variable_get(:@hts_file)
|
|
74
|
+
hdr_struct = bam.header.struct
|
|
75
|
+
itr = @iters[i]
|
|
76
|
+
block = FFI::MemoryPointer.new(:pointer, 3)
|
|
77
|
+
block.put_pointer(0 * ptr_size, hts_fp)
|
|
78
|
+
block.put_pointer(1 * ptr_size, hdr_struct)
|
|
79
|
+
block.put_pointer(2 * ptr_size, itr && !itr.null? ? itr : FFI::Pointer::NULL)
|
|
80
|
+
@data_blocks << block
|
|
81
|
+
data_array.put_pointer(i * ptr_size, block)
|
|
82
|
+
end
|
|
83
|
+
# Keep the array of per-input blocks alive while the C side holds on to them
|
|
84
|
+
@data_array = data_array
|
|
85
|
+
|
|
86
|
+
@cb = FFI::Function.new(:int, %i[pointer pointer]) do |data, b|
|
|
87
|
+
# Unpack pointers from the per-input block
|
|
88
|
+
hts_fp = data.get_pointer(0 * ptr_size)
|
|
89
|
+
hdr_struct = data.get_pointer(1 * ptr_size)
|
|
90
|
+
itr = data.get_pointer(2 * ptr_size)
|
|
91
|
+
# HTSlib contract: return same as sam_itr_next/sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
|
|
92
|
+
if itr && !itr.null?
|
|
93
|
+
HTS::LibHTS.sam_itr_next(hts_fp, itr, b)
|
|
94
|
+
else
|
|
95
|
+
HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
@iter = HTS::LibHTS.bam_mplp_init(n, @cb, @data_array)
|
|
100
|
+
raise "bam_mplp_init failed" if @iter.null?
|
|
101
|
+
|
|
102
|
+
HTS::LibHTS.bam_mplp_set_maxcnt(@iter, maxcnt) if maxcnt
|
|
103
|
+
return unless overlaps
|
|
104
|
+
|
|
105
|
+
rc = HTS::LibHTS.bam_mplp_init_overlaps(@iter)
|
|
106
|
+
raise "bam_mplp_init_overlaps failed" if rc < 0
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Yields an array of Pileup::PileupColumn (one per input) for each position
|
|
110
|
+
def each
|
|
111
|
+
return to_enum(__method__) unless block_given?
|
|
112
|
+
|
|
113
|
+
n = @bams.length
|
|
114
|
+
tid_ptr = FFI::MemoryPointer.new(:int)
|
|
115
|
+
pos_ptr = FFI::MemoryPointer.new(:long_long)
|
|
116
|
+
n_ptr = FFI::MemoryPointer.new(:int, n)
|
|
117
|
+
plp_ptr = FFI::MemoryPointer.new(:pointer, n)
|
|
118
|
+
plp1_size = HTS::LibHTS::BamPileup1.size
|
|
119
|
+
headers = @bams.map(&:header)
|
|
120
|
+
|
|
121
|
+
while HTS::LibHTS.bam_mplp64_auto(@iter, tid_ptr, pos_ptr, n_ptr, plp_ptr) > 0
|
|
122
|
+
tid = tid_ptr.read_int
|
|
123
|
+
pos = pos_ptr.read_long_long
|
|
124
|
+
|
|
125
|
+
counts = n_ptr.read_array_of_int(n)
|
|
126
|
+
plp_arr = plp_ptr.read_array_of_pointer(n)
|
|
127
|
+
|
|
128
|
+
cols = Array.new(n)
|
|
129
|
+
i = 0
|
|
130
|
+
while i < n
|
|
131
|
+
c = counts[i]
|
|
132
|
+
if c <= 0 || plp_arr[i].null?
|
|
133
|
+
cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: [])
|
|
134
|
+
else
|
|
135
|
+
base_ptr = plp_arr[i]
|
|
136
|
+
aligns = Array.new(c)
|
|
137
|
+
j = 0
|
|
138
|
+
while j < c
|
|
139
|
+
e_ptr = base_ptr + (j * plp1_size)
|
|
140
|
+
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
|
|
141
|
+
aligns[j] = HTS::Bam::Pileup::PileupRecord.new(entry, headers[i])
|
|
142
|
+
j += 1
|
|
143
|
+
end
|
|
144
|
+
cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: aligns)
|
|
145
|
+
end
|
|
146
|
+
i += 1
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
yield cols
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
self
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def close
|
|
156
|
+
if @iter && !@iter.null?
|
|
157
|
+
HTS::LibHTS.bam_mplp_destroy(@iter)
|
|
158
|
+
@iter = FFI::Pointer::NULL
|
|
159
|
+
end
|
|
160
|
+
@iters.each do |itr|
|
|
161
|
+
HTS::LibHTS.hts_itr_destroy(itr) if itr && !itr.null?
|
|
162
|
+
end
|
|
163
|
+
@iters.clear
|
|
164
|
+
# Keep references to callback and data blocks to prevent GC
|
|
165
|
+
@_keepalive = [@cb, @data_array, *@data_blocks]
|
|
166
|
+
# Close owned bams opened by this object
|
|
167
|
+
@owned_bams.each do |b|
|
|
168
|
+
b.close
|
|
169
|
+
rescue StandardError
|
|
170
|
+
end
|
|
171
|
+
@owned_bams.clear
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|