htslib 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea68a4c331c9a404cfce2bf86fea386985e96ee3b76ae25e9d9b701593294880
4
- data.tar.gz: d826e59f66e20bc40bc47c2033295abe2b3aceab8cb9d5af3d4d41309e448732
3
+ metadata.gz: e1bf158506931c62ffae1a524158de9fbb451796a68a7586cf788203f67a8cc4
4
+ data.tar.gz: d3289551dac8783cfa23f1f8d44e1b4be44b6dab3d6369f816491ceea653188f
5
5
  SHA512:
6
- metadata.gz: 646dca4eb44c96a67020f57a090c26715b003521c9c6afffad1becf031576c334ea03c99b61f795a35932935535c53a1899a960ab986480cb3f5eef5b9913b96
7
- data.tar.gz: e1cc5d9357932e04cebae1aaa5a8dc7024e0755584ea21999b18004dba76726c10b23276ee6d98fc1580ffb5aad45aeb12fd3e1bf94cfc45fdee70aefda87f91
6
+ metadata.gz: d1c316a599c2dc08e6589f980e9dd4ae6d8d6bababbfef424f35e5c25453b667bdc48570578e88a9eba142553fffb25b49ae4de54a061f99f7cbf286988ec618
7
+ data.tar.gz: 8529c6a02c354419dd722abc0bd6f27ec514ffe980f4b0d7014914de11551a45c4e96b803bc77255c3b423129a8743a3c39556d9d3d44ec6c6692556485379cf
data/README.md CHANGED
@@ -165,8 +165,6 @@ Try Crystal. [HTS.cr](https://github.com/bio-cr/hts.cr) is implemented in Crysta
165
165
 
166
166
  ## Development
167
167
 
168
- ![Diagram](diagram.svg)
169
-
170
168
  #### Compile from source code
171
169
 
172
170
  [GNU Autotools](https://en.wikipedia.org/wiki/GNU_Autotools) is required to compile htslib.
@@ -0,0 +1,343 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bam < Hts
5
+ # Base modification information from MM/ML tags
6
+ #
7
+ # This class provides access to DNA/RNA base modifications such as methylation.
8
+ # It wraps the htslib base modification API and provides a Ruby-friendly interface.
9
+ #
10
+ # @note BaseMod is a view object that references data in a Record.
11
+ # The state is maintained in hts_base_mod_state structure.
12
+ class BaseMod
13
+ include Enumerable
14
+
15
+ class NotParsedError < StandardError; end
16
+
17
+ attr_reader :record
18
+
19
+ # Individual base modification information
20
+ class Modification
21
+ attr_reader :modified_base, :canonical_base, :strand, :qual
22
+
23
+ # @param modified_base [Integer] Modification code as char or -ChEBI
24
+ # @param canonical_base [Integer] Canonical base (A, C, G, T, N)
25
+ # @param strand [Integer] 0 or 1 for +/- strand
26
+ # @param qual [Integer] Quality (256*probability) or -1 if unknown
27
+ def initialize(modified_base:, canonical_base:, strand:, qual:)
28
+ @modified_base = modified_base
29
+ @canonical_base = canonical_base
30
+ @strand = strand
31
+ @qual = qual
32
+ end
33
+
34
+ # Get modification code as character or ChEBI number as string
35
+ # @return [String] Single character code or ChEBI number as string
36
+ def code
37
+ @modified_base > 0 ? @modified_base.chr : @modified_base.to_s
38
+ end
39
+
40
+ # Get canonical base as character
41
+ # @return [String] Single character (A, C, G, T, N)
42
+ def canonical
43
+ @canonical_base.chr
44
+ end
45
+
46
+ # Get likelihood as a probability (0.0-1.0)
47
+ # @return [Float, nil] Probability or nil if qual is -1
48
+ def probability
49
+ return nil if @qual == -1
50
+
51
+ @qual / 256.0
52
+ end
53
+
54
+ # Convert to hash representation
55
+ # @return [Hash] Hash with modification information
56
+ def to_h
57
+ {
58
+ modified_base: @modified_base,
59
+ code: code,
60
+ canonical_base: @canonical_base,
61
+ canonical: canonical,
62
+ strand: @strand,
63
+ qual: @qual,
64
+ probability: probability
65
+ }
66
+ end
67
+
68
+ # String representation
69
+ # @return [String] String representation of the modification
70
+ def to_s
71
+ if @qual >= 0
72
+ "#{canonical}->#{code}(#{probability.round(3)})"
73
+ else
74
+ "#{canonical}->#{code}"
75
+ end
76
+ end
77
+
78
+ # Inspect string
79
+ # @return [String] Inspect string
80
+ def inspect
81
+ "#<HTS::Bam::BaseMod::Modification #{self}>"
82
+ end
83
+ end
84
+
85
+ # Position-specific modification information
86
+ class Position
87
+ attr_reader :position, :modifications
88
+
89
+ # @param position [Integer] Position in query sequence
90
+ # @param modifications [Array<Modification>] Array of modifications at this position
91
+ def initialize(position, modifications)
92
+ @position = position
93
+ @modifications = modifications
94
+ end
95
+
96
+ # Check if this position has methylation
97
+ # @return [Boolean] true if any modification is methylation ('m')
98
+ def methylated?
99
+ @modifications.any? { |m| m.code == "m" }
100
+ end
101
+
102
+ # Check if this position has hydroxymethylation
103
+ # @return [Boolean] true if any modification is hydroxymethylation ('h')
104
+ def hydroxymethylated?
105
+ @modifications.any? { |m| m.code == "h" }
106
+ end
107
+
108
+ # Convert to hash representation
109
+ # @return [Hash] Hash with position information
110
+ def to_h
111
+ {
112
+ position: @position,
113
+ modifications: @modifications.map(&:to_h)
114
+ }
115
+ end
116
+
117
+ # String representation
118
+ # @return [String] String representation
119
+ def to_s
120
+ mods_str = @modifications.map(&:to_s).join(", ")
121
+ "pos=#{@position} [#{mods_str}]"
122
+ end
123
+
124
+ # Inspect string
125
+ # @return [String] Inspect string
126
+ def inspect
127
+ "#<HTS::Bam::BaseMod::Position #{self}>"
128
+ end
129
+ end
130
+
131
+ # Initialize a new BaseMod object
132
+ # @param record [Record] The BAM record to extract modifications from
133
+ # @param auto_parse [Boolean] If true, parse MM/ML lazily on first access
134
+ def initialize(record, auto_parse: true)
135
+ @record = record
136
+ @state = LibHTS.hts_base_mod_state_alloc
137
+ @closed = false
138
+ @auto_parse = !!auto_parse
139
+ @parsed = false
140
+ raise Error, "Failed to allocate hts_base_mod_state" if @state.null?
141
+ end
142
+
143
+ # Explicitly free the state
144
+ # @return [void]
145
+ def close
146
+ return if @closed
147
+
148
+ # With HtsBaseModState as an AutoPointer, releasing the Ruby object
149
+ # is sufficient. Avoid manual free to prevent double-free.
150
+ @state = nil
151
+ @closed = true
152
+ end
153
+
154
+ # Whether this object has parsed MM/ML tags already
155
+ # @return [Boolean]
156
+ def parsed?
157
+ @parsed
158
+ end
159
+
160
+ # Ensure MM/ML have been parsed, performing lazy parse if enabled.
161
+ # @param flags [Integer]
162
+ # @return [void]
163
+ def ensure_parsed!(flags = 0)
164
+ return if @parsed
165
+
166
+ raise NotParsedError, "BaseMod is not parsed. Call #parse first (auto_parse is disabled)." unless @auto_parse
167
+
168
+ parse(flags)
169
+ end
170
+
171
+ # Parse MM and ML tags from the record
172
+ # @param flags [Integer] Parsing flags (default: 0)
173
+ # @return [Integer] Number of modification types found, or -1 on error
174
+ # @raise [Error] If parsing fails
175
+ def parse(flags = 0)
176
+ ret = LibHTS.bam_parse_basemod2(@record.struct, @state, flags)
177
+ raise Error, "Failed to parse base modifications" if ret < 0
178
+
179
+ @parsed = true
180
+ ret
181
+ end
182
+
183
+ # Get modification information at a specific query position
184
+ # @param position [Integer] Query position (0-based)
185
+ # @param max_mods [Integer] Maximum number of modifications to retrieve
186
+ # @return [Position, nil] Position object with modifications, or nil if none
187
+ def at_pos(position, max_mods: 10)
188
+ # Reset state to ensure deterministic results even after prior iteration
189
+ parsed? ? parse : ensure_parsed!
190
+
191
+ mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
192
+
193
+ ret = LibHTS.bam_mods_at_qpos(@record.struct, position, @state,
194
+ mods_ptr, max_mods)
195
+ return nil if ret <= 0
196
+
197
+ build_position(position, mods_ptr, [ret, max_mods].min)
198
+ end
199
+
200
+ # Array-style access to modifications at a position
201
+ # @param position [Integer] Query position (0-based)
202
+ # @return [Position, nil] Position object with modifications, or nil if none
203
+ def [](position)
204
+ at_pos(position)
205
+ end
206
+
207
+ # Iterate over all positions with modifications
208
+ # @param max_mods [Integer] Maximum number of modifications per position
209
+ # @yield [Position] Position object for each modified position
210
+ # @return [Enumerator] If no block given
211
+ def each_position(max_mods: 10)
212
+ return enum_for(__method__, max_mods: max_mods) unless block_given?
213
+
214
+ # Reset state at the start of iteration to allow repeated enumerations
215
+ parsed? ? parse : ensure_parsed!
216
+
217
+ pos_ptr = FFI::MemoryPointer.new(:int)
218
+ mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
219
+
220
+ loop do
221
+ ret = LibHTS.bam_next_basemod(@record.struct, @state,
222
+ mods_ptr, max_mods, pos_ptr)
223
+ break if ret <= 0
224
+
225
+ position = pos_ptr.read_int
226
+ yield build_position(position, mods_ptr, [ret, max_mods].min)
227
+ end
228
+ end
229
+
230
+ alias each each_position
231
+
232
+ # Get list of modification types present in this record
233
+ # @return [Array<Integer>] Array of modification codes (char code or -ChEBI)
234
+ def modification_types
235
+ ensure_parsed!
236
+
237
+ ntype_ptr = FFI::MemoryPointer.new(:int)
238
+ codes_ptr = LibHTS.bam_mods_recorded(@state, ntype_ptr)
239
+
240
+ ntype = ntype_ptr.read_int
241
+ return [] if ntype <= 0 || codes_ptr.null?
242
+
243
+ codes_ptr.read_array_of_int(ntype)
244
+ end
245
+
246
+ alias recorded_types modification_types
247
+
248
+ # Query information about a specific modification type by code
249
+ # @param code [Integer, String] Modification code (char code or -ChEBI, or single char string)
250
+ # @return [Hash, nil] Hash with canonical, strand, implicit info, or nil if not found
251
+ def query_type(code)
252
+ ensure_parsed!
253
+
254
+ code = code.ord if code.is_a?(String)
255
+
256
+ strand_ptr = FFI::MemoryPointer.new(:int)
257
+ implicit_ptr = FFI::MemoryPointer.new(:int)
258
+ canonical_ptr = FFI::MemoryPointer.new(:char, 1)
259
+
260
+ ret = LibHTS.bam_mods_query_type(@state, code, strand_ptr,
261
+ implicit_ptr, canonical_ptr)
262
+ return nil if ret < 0
263
+
264
+ {
265
+ canonical: canonical_ptr.read_char.chr,
266
+ strand: strand_ptr.read_int,
267
+ implicit: implicit_ptr.read_int != 0
268
+ }
269
+ end
270
+
271
+ # Query information about i-th modification type
272
+ # @param index [Integer] Modification type index (0-based)
273
+ # @return [Hash, nil] Hash with code, canonical, strand, implicit info
274
+ def query_type_at(index)
275
+ ensure_parsed!
276
+
277
+ strand_ptr = FFI::MemoryPointer.new(:int)
278
+ implicit_ptr = FFI::MemoryPointer.new(:int)
279
+ canonical_ptr = FFI::MemoryPointer.new(:char, 1)
280
+
281
+ ret = LibHTS.bam_mods_queryi(@state, index, strand_ptr,
282
+ implicit_ptr, canonical_ptr)
283
+ return nil if ret < 0
284
+
285
+ types = modification_types
286
+ {
287
+ code: types[index],
288
+ canonical: canonical_ptr.read_char.chr,
289
+ strand: strand_ptr.read_int,
290
+ implicit: implicit_ptr.read_int != 0
291
+ }
292
+ end
293
+
294
+ # Get all modifications as an array
295
+ # @return [Array<Position>] Array of all positions with modifications
296
+ def to_a
297
+ each_position.to_a
298
+ end
299
+
300
+ # String representation for debugging
301
+ # @return [String] String representation
302
+ def to_s
303
+ return "#<HTS::Bam::BaseMod (not parsed)>" unless @parsed
304
+
305
+ mods = []
306
+ each_position do |pos|
307
+ mods << pos.to_s
308
+ end
309
+ "#<HTS::Bam::BaseMod #{mods.join(' ')}>"
310
+ end
311
+
312
+ # Inspect string
313
+ # @return [String] Inspect string
314
+ def inspect
315
+ to_s
316
+ end
317
+
318
+ private
319
+
320
+ # Build Position object from hts_base_mod array
321
+ # @param position [Integer] Query position
322
+ # @param mods_ptr [FFI::Pointer] Pointer to array of HtsBaseMod structures
323
+ # @param n_mods [Integer] Number of modifications
324
+ # @return [Position] Position object
325
+ def build_position(position, mods_ptr, n_mods)
326
+ modifications = []
327
+
328
+ n_mods.times do |i|
329
+ mod_struct = LibHTS::HtsBaseMod.new(mods_ptr + i * LibHTS::HtsBaseMod.size)
330
+
331
+ modifications << Modification.new(
332
+ modified_base: mod_struct[:modified_base],
333
+ canonical_base: mod_struct[:canonical_base],
334
+ strand: mod_struct[:strand],
335
+ qual: mod_struct[:qual]
336
+ )
337
+ end
338
+
339
+ Position.new(position, modifications)
340
+ end
341
+ end
342
+ end
343
+ end
@@ -111,6 +111,23 @@ module HTS
111
111
  name2tid(name)
112
112
  end
113
113
 
114
+ # Add a @PG (program) line to the header
115
+ # @param program_name [String] Name of the program
116
+ # @param options [Hash] Key-value pairs for @PG tags (ID, PN, VN, CL, PP, etc.)
117
+ # @return [Integer] 0 on success, -1 on failure
118
+ #
119
+ # This is a convenience wrapper around sam_hdr_add_pg that automatically:
120
+ # - Generates a unique ID if the specified one clashes
121
+ # - Manages PP (previous program) chains automatically
122
+ #
123
+ # @example
124
+ # header.add_pg("bwa", VN: "0.7.17", CL: "bwa mem ref.fa read.fq")
125
+ # header.add_pg("samtools", VN: "1.15", PP: "bwa")
126
+ def add_pg(program_name, **options)
127
+ args = options.flat_map { |k, v| [:string, k.to_s, :string, v.to_s] }
128
+ LibHTS.sam_hdr_add_pg(@sam_hdr, program_name, *args, :pointer, FFI::Pointer::NULL)
129
+ end
130
+
114
131
  private
115
132
 
116
133
  def name2tid(name)
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bam < Hts
5
+ # High-level mpileup iterator over multiple BAM/CRAM inputs
6
+ class Mpileup
7
+ include Enumerable
8
+
9
+ # Usage:
10
+ # HTS::Bam::Mpileup.open([bam1, bam2], region: "chr1:1-100") do |mpl|
11
+ # mpl.each { |cols| ... }
12
+ # end
13
+ def self.open(*args, **kw)
14
+ m = new(*args, **kw)
15
+ return m unless block_given?
16
+
17
+ begin
18
+ yield m
19
+ ensure
20
+ m.close
21
+ end
22
+ m
23
+ end
24
+
25
+ # Normalize inputs to HTS::Bam instances
26
+ # Accepts array of HTS::Bam or filenames (String)
27
+ def initialize(inputs, region: nil, beg: nil, end_: nil, maxcnt: nil, overlaps: false)
28
+ raise ArgumentError, "inputs must be non-empty" if inputs.nil? || inputs.empty?
29
+
30
+ @owned_bams = [] # Bams we opened here; will be closed on close
31
+ @bams = inputs.map do |x|
32
+ case x
33
+ when HTS::Bam
34
+ x
35
+ when String
36
+ b = HTS::Bam.open(x)
37
+ @owned_bams << b
38
+ b
39
+ else
40
+ raise ArgumentError, "Unsupported input type: #{x.class}"
41
+ end
42
+ end
43
+
44
+ n = @bams.length
45
+ @iters = []
46
+ @data_blocks = [] # per-input packed pointers kept alive
47
+
48
+ # Prepare optional region iterators for each input
49
+ @bams.each_with_index do |bam, i|
50
+ itr = nil
51
+ if region && beg.nil? && end_.nil?
52
+ raise "Index required for region mpileup" unless bam.index_loaded?
53
+
54
+ itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), bam.header.struct, region)
55
+ raise "Failed to query region on input ##{i}: #{region}" if itr.null?
56
+ elsif region && beg && end_
57
+ raise "Index required for region mpileup" unless bam.index_loaded?
58
+
59
+ tid = bam.header.get_tid(region)
60
+ itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
61
+ raise "Failed to query region on input ##{i}: #{region} #{beg} #{end_}" if itr.null?
62
+ elsif beg || end_
63
+ raise ArgumentError, "beg and end_ must be specified together"
64
+ end
65
+ @iters << itr
66
+ end
67
+
68
+ # Build per-input packed pointer blocks so C passes them back to the callback.
69
+ # Layout per input: [0] hts_fp (htsFile*), [1] hdr_struct (bam_hdr_t*), [2] itr (hts_itr_t* or NULL)
70
+ ptr_size = FFI.type_size(:pointer)
71
+ data_array = FFI::MemoryPointer.new(:pointer, n)
72
+ @bams.each_with_index do |bam, i|
73
+ hts_fp = bam.instance_variable_get(:@hts_file)
74
+ hdr_struct = bam.header.struct
75
+ itr = @iters[i]
76
+ block = FFI::MemoryPointer.new(:pointer, 3)
77
+ block.put_pointer(0 * ptr_size, hts_fp)
78
+ block.put_pointer(1 * ptr_size, hdr_struct)
79
+ block.put_pointer(2 * ptr_size, itr && !itr.null? ? itr : FFI::Pointer::NULL)
80
+ @data_blocks << block
81
+ data_array.put_pointer(i * ptr_size, block)
82
+ end
83
+ # Keep the array of per-input blocks alive while the C side holds on to them
84
+ @data_array = data_array
85
+
86
+ @cb = FFI::Function.new(:int, %i[pointer pointer]) do |data, b|
87
+ # Unpack pointers from the per-input block
88
+ hts_fp = data.get_pointer(0 * ptr_size)
89
+ hdr_struct = data.get_pointer(1 * ptr_size)
90
+ itr = data.get_pointer(2 * ptr_size)
91
+ # HTSlib contract: return same as sam_itr_next/sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
92
+ if itr && !itr.null?
93
+ HTS::LibHTS.sam_itr_next(hts_fp, itr, b)
94
+ else
95
+ HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
96
+ end
97
+ end
98
+
99
+ @iter = HTS::LibHTS.bam_mplp_init(n, @cb, @data_array)
100
+ raise "bam_mplp_init failed" if @iter.null?
101
+
102
+ HTS::LibHTS.bam_mplp_set_maxcnt(@iter, maxcnt) if maxcnt
103
+ return unless overlaps
104
+
105
+ rc = HTS::LibHTS.bam_mplp_init_overlaps(@iter)
106
+ raise "bam_mplp_init_overlaps failed" if rc < 0
107
+ end
108
+
109
+ # Yields an array of Pileup::PileupColumn (one per input) for each position
110
+ def each
111
+ return to_enum(__method__) unless block_given?
112
+
113
+ n = @bams.length
114
+ tid_ptr = FFI::MemoryPointer.new(:int)
115
+ pos_ptr = FFI::MemoryPointer.new(:long_long)
116
+ n_ptr = FFI::MemoryPointer.new(:int, n)
117
+ plp_ptr = FFI::MemoryPointer.new(:pointer, n)
118
+ plp1_size = HTS::LibHTS::BamPileup1.size
119
+ headers = @bams.map(&:header)
120
+
121
+ while HTS::LibHTS.bam_mplp64_auto(@iter, tid_ptr, pos_ptr, n_ptr, plp_ptr) > 0
122
+ tid = tid_ptr.read_int
123
+ pos = pos_ptr.read_long_long
124
+
125
+ counts = n_ptr.read_array_of_int(n)
126
+ plp_arr = plp_ptr.read_array_of_pointer(n)
127
+
128
+ cols = Array.new(n)
129
+ i = 0
130
+ while i < n
131
+ c = counts[i]
132
+ if c <= 0 || plp_arr[i].null?
133
+ cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: [])
134
+ else
135
+ base_ptr = plp_arr[i]
136
+ aligns = Array.new(c)
137
+ j = 0
138
+ while j < c
139
+ e_ptr = base_ptr + (j * plp1_size)
140
+ entry = HTS::LibHTS::BamPileup1.new(e_ptr)
141
+ aligns[j] = HTS::Bam::Pileup::PileupRecord.new(entry, headers[i])
142
+ j += 1
143
+ end
144
+ cols[i] = HTS::Bam::Pileup::PileupColumn.new(tid: tid, pos: pos, alignments: aligns)
145
+ end
146
+ i += 1
147
+ end
148
+
149
+ yield cols
150
+ end
151
+
152
+ self
153
+ end
154
+
155
+ def close
156
+ if @iter && !@iter.null?
157
+ HTS::LibHTS.bam_mplp_destroy(@iter)
158
+ @iter = FFI::Pointer::NULL
159
+ end
160
+ @iters.each do |itr|
161
+ HTS::LibHTS.hts_itr_destroy(itr) if itr && !itr.null?
162
+ end
163
+ @iters.clear
164
+ # Keep references to callback and data blocks to prevent GC
165
+ @_keepalive = [@cb, @data_array, *@data_blocks]
166
+ # Close owned bams opened by this object
167
+ @owned_bams.each do |b|
168
+ b.close
169
+ rescue StandardError
170
+ end
171
+ @owned_bams.clear
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bam < Hts
5
+ # High-level pileup iterator for a single SAM/BAM/CRAM
6
+ class Pileup
7
+ include Enumerable
8
+
9
+ # Usage:
10
+ # HTS::Bam::Pileup.open(bam, region: "chr1:1-100") do |pl|
11
+ # pl.each { |col| ... }
12
+ # end
13
+ def self.open(*args, **kw)
14
+ pu = new(*args, **kw)
15
+ return pu unless block_given?
16
+
17
+ begin
18
+ yield pu
19
+ ensure
20
+ pu.close
21
+ end
22
+ pu
23
+ end
24
+
25
+ # A column at a reference position with pileup alignments
26
+ PileupColumn = Struct.new(:tid, :pos, :alignments, keyword_init: true) do
27
+ def depth
28
+ alignments.length
29
+ end
30
+ end
31
+
32
+ # A wrapper of one bam_pileup1_t entry
33
+ class PileupRecord
34
+ def initialize(entry, header)
35
+ @entry = entry
36
+ @header = header
37
+ @record = nil
38
+ end
39
+
40
+ # Return Bam::Record. On the first call, duplicate the underlying bam1_t (bam_dup1)
41
+ # so the record becomes safe to keep beyond the current pileup step. Subsequent calls
42
+ # return the cached Bam::Record instance.
43
+ # NOTE: Without duplication, bam1_t memory may be reused by HTSlib on the next step.
44
+ def record
45
+ return @record if @record
46
+
47
+ # Normalize to a raw pointer and duplicate to obtain owned memory.
48
+ b_ptr = @entry[:b].is_a?(FFI::Pointer) ? @entry[:b] : @entry[:b].to_ptr
49
+ dup_ptr = HTS::LibHTS.bam_dup1(b_ptr)
50
+ raise "bam_dup1 failed" if dup_ptr.null?
51
+
52
+ # Build a Bam::Record backed by the duplicated bam1_t.
53
+ @record = HTS::Bam::Record.new(@header, dup_ptr)
54
+ end
55
+
56
+ def query_position
57
+ @entry[:qpos]
58
+ end
59
+
60
+ def indel
61
+ @entry[:indel]
62
+ end
63
+
64
+ def del?
65
+ @entry[:is_del] == 1
66
+ end
67
+
68
+ def head?
69
+ @entry[:is_head] == 1
70
+ end
71
+
72
+ def tail?
73
+ @entry[:is_tail] == 1
74
+ end
75
+
76
+ def refskip?
77
+ @entry[:is_refskip] == 1
78
+ end
79
+ end
80
+
81
+ # Create a Pileup iterator
82
+ # @param bam [HTS::Bam]
83
+ # @param region [String, nil] Optional region string (requires index)
84
+ # @param beg [Integer, nil] Optional begin when using tid/beg/end form
85
+ # @param end_ [Integer, nil] Optional end when using tid/beg/end form
86
+ # @param maxcnt [Integer, nil] Max per-position depth (capped)
87
+ def initialize(bam, region: nil, beg: nil, end_: nil, maxcnt: nil)
88
+ @bam = bam
89
+ @header = bam.header
90
+ @itr = nil
91
+ @cb = nil
92
+ @plp = nil
93
+
94
+ # Optional region iterator
95
+ if region && beg.nil? && end_.nil?
96
+ raise "Index file is required to use region pileup." unless bam.index_loaded?
97
+
98
+ @itr = HTS::LibHTS.sam_itr_querys(bam.instance_variable_get(:@idx), @header.struct, region)
99
+ raise "Failed to query region: #{region}" if @itr.null?
100
+ elsif region && beg && end_
101
+ raise "Index file is required to use region pileup." unless bam.index_loaded?
102
+
103
+ tid = @header.get_tid(region)
104
+ @itr = HTS::LibHTS.sam_itr_queryi(bam.instance_variable_get(:@idx), tid, beg, end_)
105
+ raise "Failed to query region: #{region} #{beg} #{end_}" if @itr.null?
106
+ elsif beg || end_
107
+ raise ArgumentError, "beg and end_ must be specified together"
108
+ end
109
+
110
+ # Build the auto callback for bam_plp_init (micro-optimized)
111
+ # - Hoist ivar/constant lookups out of the callback to reduce per-call overhead.
112
+ # - Specialize callbacks to avoid branching in the hot path.
113
+ hts_fp = @bam.instance_variable_get(:@hts_file)
114
+ hdr_struct = @header.struct
115
+ itr_local = @itr
116
+
117
+ @cb = if itr_local && !itr_local.null?
118
+ FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
119
+ # HTSlib contract: return same as sam_itr_next (>= 0 on success, -1 on EOF, < -1 on error)
120
+ HTS::LibHTS.sam_itr_next(hts_fp, itr_local, b)
121
+ end
122
+ else
123
+ FFI::Function.new(:int, %i[pointer pointer]) do |_data, b|
124
+ # HTSlib contract: return same as sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
125
+ HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
126
+ end
127
+ end
128
+
129
+ @plp = HTS::LibHTS.bam_plp_init(@cb, nil)
130
+ raise "bam_plp_init failed" if @plp.null?
131
+
132
+ HTS::LibHTS.bam_plp_set_maxcnt(@plp, maxcnt) if maxcnt
133
+ end
134
+
135
+ def each
136
+ return to_enum(__method__) unless block_given?
137
+
138
+ tid_ptr = FFI::MemoryPointer.new(:int)
139
+ pos_ptr = FFI::MemoryPointer.new(:long_long) # hts_pos_t
140
+ n_ptr = FFI::MemoryPointer.new(:int)
141
+
142
+ # Micro-optimizations:
143
+ # - Compute constant struct size once
144
+ # - Hoist header reference outside the loop
145
+ plp1_size = HTS::LibHTS::BamPileup1.size
146
+ header_local = @header
147
+
148
+ loop do
149
+ base_ptr = HTS::LibHTS.bam_plp64_auto(@plp, tid_ptr, pos_ptr, n_ptr)
150
+
151
+ # When base_ptr is NULL, check n to distinguish EOF (n == 0) from error (n < 0)
152
+ if base_ptr.null?
153
+ n = n_ptr.read_int
154
+ raise "HTSlib pileup error (bam_plp64_auto)" if n < 0
155
+
156
+ break
157
+ end
158
+
159
+ tid = tid_ptr.read_int
160
+ pos = pos_ptr.read_long_long
161
+ n = n_ptr.read_int
162
+
163
+ # Construct alignment entries with minimal allocations
164
+ if n.zero?
165
+ alignments = []
166
+ else
167
+ alignments = Array.new(n)
168
+ i = 0
169
+ while i < n
170
+ e_ptr = base_ptr + (i * plp1_size)
171
+ entry = HTS::LibHTS::BamPileup1.new(e_ptr)
172
+ alignments[i] = PileupRecord.new(entry, header_local)
173
+ i += 1
174
+ end
175
+ end
176
+
177
+ yield PileupColumn.new(tid: tid, pos: pos, alignments: alignments)
178
+ end
179
+
180
+ self
181
+ end
182
+
183
+ def reset
184
+ HTS::LibHTS.bam_plp_reset(@plp) if @plp && !@plp.null?
185
+ end
186
+
187
+ def close
188
+ if @plp && !@plp.null?
189
+ HTS::LibHTS.bam_plp_destroy(@plp)
190
+ @plp = FFI::Pointer::NULL
191
+ end
192
+ if @itr && !@itr.null?
193
+ HTS::LibHTS.hts_itr_destroy(@itr)
194
+ @itr = FFI::Pointer::NULL
195
+ end
196
+ # Keep @cb referenced by instance to avoid GC during iteration.
197
+ @cb
198
+ end
199
+ end
200
+ end
201
+ end
@@ -326,6 +326,13 @@ module HTS
326
326
  end
327
327
  end
328
328
 
329
+ # Get base modification information from MM/ML tags
330
+ # @param auto_parse [Boolean] If true (default), parse lazily on first access
331
+ # @return [BaseMod] Base modification object
332
+ def base_mod(auto_parse: true)
333
+ BaseMod.new(self, auto_parse: auto_parse)
334
+ end
335
+
329
336
  # TODO: add a method to get the auxiliary fields as a hash.
330
337
 
331
338
  # TODO: add a method to set the auxiliary fields.
@@ -352,8 +359,13 @@ module HTS
352
359
  private
353
360
 
354
361
  def initialize_copy(orig)
362
+ super
355
363
  @header = orig.header
356
- @bam = LibHTS.bam_dup1(orig.struct)
364
+ # Deep-copy underlying bam1_t to detach from original buffer
365
+ dup_bam1 = LibHTS.bam_dup1(orig.struct)
366
+ raise "bam_dup1 failed" if dup_bam1.null?
367
+
368
+ @bam1 = dup_bam1
357
369
  end
358
370
  end
359
371
  end
data/lib/hts/bam.rb CHANGED
@@ -7,7 +7,9 @@ require_relative "bam/header"
7
7
  require_relative "bam/cigar"
8
8
  require_relative "bam/flag"
9
9
  require_relative "bam/record"
10
- # require_relative "bam/pileup"
10
+ require_relative "bam/base_mod"
11
+ require_relative "bam/pileup"
12
+ require_relative "bam/mpileup"
11
13
  # require_relative "bam/pileup_entry"
12
14
 
13
15
  module HTS
@@ -160,7 +162,7 @@ module HTS
160
162
 
161
163
  position = tell
162
164
  ary = map { |r| r.aux(tag) }
163
- seek(position)
165
+ seek(position) if position
164
166
  ary
165
167
  end
166
168
 
@@ -194,6 +196,13 @@ module HTS
194
196
  self
195
197
  end
196
198
 
199
+ # Iterate alignment records in this file.
200
+ #
201
+ # Performance and memory semantics:
202
+ # - copy: false (default) reuses a single Record instance and its underlying bam1_t buffer.
203
+ # The yielded Record MUST NOT be stored beyond the block; its content will be overwritten
204
+ # by the next iteration. If you need to retain it, call `rec = rec.dup`.
205
+ # - copy: true yields a fresh Record per iteration (deep-copied via bam_dup1). Slower, safe to keep.
197
206
  def each(copy: false, &block)
198
207
  if copy
199
208
  each_record_copy(&block)
@@ -202,23 +211,65 @@ module HTS
202
211
  end
203
212
  end
204
213
 
214
+ # Iterate records in a genomic region or multiple regions.
215
+ # See {#each} for copy semantics. When copy: false, the yielded Record is reused and should not be stored.
216
+ #
217
+ # @param region [String, Array<String>] Region specification(s)
218
+ # - Single region: "chr1:100-200" or "chr1" with beg/end parameters
219
+ # - Multiple regions: ["chr1:100-200", "chr2:500-600", ...]
220
+ # @param beg [Integer, nil] Start position (used with single string region)
221
+ # @param end_ [Integer, nil] End position (used with single string region)
222
+ # @param copy [Boolean] Whether to deep-copy records (see {#each})
223
+ #
224
+ # @example Single region query
225
+ # bam.query("chr1:100-200") { |r| puts r.qname }
226
+ # bam.query("chr1", 100, 200) { |r| puts r.qname }
227
+ #
228
+ # @example Multi-region query
229
+ # bam.query(["chr1:100-200", "chr2:500-600"]) { |r| puts r.qname }
205
230
  def query(region, beg = nil, end_ = nil, copy: false, &block)
206
231
  check_closed
207
232
  raise "Index file is required to call the query method." unless index_loaded?
208
233
 
209
- if beg && end_
210
- tid = header.get_tid(region)
211
- queryi(tid, beg, end_, copy:, &block)
212
- elsif beg.nil? && end_.nil?
213
- querys(region, copy:, &block)
234
+ case region
235
+ when Array
236
+ raise ArgumentError, "beg and end_ cannot be used with array of regions" if beg || end_
237
+
238
+ query_regions(region, copy:, &block)
239
+ when String
240
+ if beg && end_
241
+ tid = header.get_tid(region)
242
+ queryi(tid, beg, end_, copy:, &block)
243
+ elsif beg.nil? && end_.nil?
244
+ querys(region, copy:, &block)
245
+ else
246
+ raise ArgumentError, "beg and end_ must be specified together"
247
+ end
214
248
  else
215
- raise ArgumentError, "beg and end_ must be specified together"
249
+ raise ArgumentError, "region must be String or Array"
216
250
  end
217
251
  end
218
252
 
219
- # def pileup
220
- # Pileup.new(self)
221
- # end
253
+ # Pileup iterator over this file. Optional region can be specified.
254
+ # When a block is given, uses RAII-style and ensures the iterator is closed at block end.
255
+ # Without a block, returns an Enumerator over a live Pileup instance; caller should close when done.
256
+ #
257
+ # @param region [String, nil] region string like "chr1:100-200"
258
+ # @param beg [Integer, nil]
259
+ # @param end_ [Integer, nil]
260
+ # @param maxcnt [Integer, nil] cap on depth per position
261
+ def pileup(region = nil, beg = nil, end_: nil, maxcnt: nil, &block)
262
+ check_closed
263
+ if block_given?
264
+ Pileup.open(self, region:, beg:, end_: end_, maxcnt: maxcnt) do |piter|
265
+ piter.each(&block)
266
+ end
267
+ self
268
+ else
269
+ piter = Pileup.new(self, region:, beg:, end_: end_, maxcnt: maxcnt)
270
+ piter.to_enum(:each)
271
+ end
272
+ end
222
273
 
223
274
  private
224
275
 
@@ -238,6 +289,17 @@ module HTS
238
289
  end
239
290
  end
240
291
 
292
+ # Multi-region query implementation
293
+ def query_regions(regions, copy: false, &block)
294
+ if copy
295
+ query_regions_copy(regions, &block)
296
+ else
297
+ query_regions_reuse(regions, &block)
298
+ end
299
+ end
300
+
301
+ # Internal: yield a single reused Record over the entire file.
302
+ # The underlying bam1_t is mutated on each iteration for speed.
241
303
  def each_record_reuse
242
304
  check_closed
243
305
  # Each does not always start at the beginning of the file.
@@ -250,6 +312,7 @@ module HTS
250
312
  self
251
313
  end
252
314
 
315
+ # Internal: yield deep-copied Records so callers may retain them safely.
253
316
  def each_record_copy
254
317
  check_closed
255
318
  return to_enum(__method__) unless block_given?
@@ -301,6 +364,7 @@ module HTS
301
364
  self
302
365
  end
303
366
 
367
+ # Internal: reused-Record iterator over a query iterator.
304
368
  def query_reuse_yield(qiter)
305
369
  bam1 = LibHTS.bam_init1
306
370
  record = Record.new(header, bam1)
@@ -323,5 +387,27 @@ module HTS
323
387
  ensure
324
388
  LibHTS.hts_itr_destroy(qiter)
325
389
  end
390
+
391
+ # Multi-region query using sequential single-region queries
392
+ # Note: This is a fallback implementation. Ideally we would use sam_itr_regarray
393
+ # but there seem to be issues with the multi-region iterator in the current setup.
394
+ def query_regions_reuse(regions, &block)
395
+ return to_enum(__method__, regions) unless block_given?
396
+
397
+ regions.each do |region|
398
+ querys_reuse(region, &block)
399
+ end
400
+ self
401
+ end
402
+
403
+ # Multi-region query with copied Records using sequential queries
404
+ def query_regions_copy(regions, &block)
405
+ return to_enum(__method__, regions) unless block_given?
406
+
407
+ regions.each do |region|
408
+ querys_copy(region, &block)
409
+ end
410
+ self
411
+ end
326
412
  end
327
413
  end
data/lib/hts/bcf.rb CHANGED
@@ -215,13 +215,20 @@ module HTS
215
215
  raise "query is only available for BCF files" unless file_format == "bcf"
216
216
  raise "Index file is required to call the query method." unless index_loaded?
217
217
 
218
- if beg && end_
219
- tid = header.name2id(region)
220
- queryi(tid, beg, end_, copy:, &block)
221
- elsif beg.nil? && end_.nil?
222
- querys(region, copy:, &block)
218
+ case region
219
+ when Array
220
+ raise ArgumentError, "beg and end must not be specified when region is an Array" unless beg.nil? && end_.nil?
221
+
222
+ query_regions(region, copy:, &block)
223
223
  else
224
- raise ArgumentError, "beg and end must be specified together"
224
+ if beg && end_
225
+ tid = header.name2id(region)
226
+ queryi(tid, beg, end_, copy:, &block)
227
+ elsif beg.nil? && end_.nil?
228
+ querys(region, copy:, &block)
229
+ else
230
+ raise ArgumentError, "beg and end must be specified together"
231
+ end
225
232
  end
226
233
  end
227
234
 
@@ -243,6 +250,14 @@ module HTS
243
250
  end
244
251
  end
245
252
 
253
+ def query_regions(regions, copy: false, &block)
254
+ if copy
255
+ query_regions_copy(regions, &block)
256
+ else
257
+ query_regions_reuse(regions, &block)
258
+ end
259
+ end
260
+
246
261
  def queryi_reuse(tid, beg, end_, &block)
247
262
  return to_enum(__method__, tid, beg, end_) unless block_given?
248
263
 
@@ -263,6 +278,15 @@ module HTS
263
278
  self
264
279
  end
265
280
 
281
+ def query_regions_reuse(regions, &block)
282
+ return to_enum(__method__, regions) unless block_given?
283
+
284
+ regions.each do |region|
285
+ querys_reuse(region, &block)
286
+ end
287
+ self
288
+ end
289
+
266
290
  def query_reuse_yield(qiter)
267
291
  bcf1 = LibHTS.bcf_init
268
292
  record = Record.new(header, bcf1)
@@ -299,6 +323,15 @@ module HTS
299
323
  self
300
324
  end
301
325
 
326
+ def query_regions_copy(regions, &block)
327
+ return to_enum(__method__, regions) unless block_given?
328
+
329
+ regions.each do |region|
330
+ querys_copy(region, &block)
331
+ end
332
+ self
333
+ end
334
+
302
335
  def query_copy_yield(qiter)
303
336
  loop do
304
337
  bcf1 = LibHTS.bcf_init
data/lib/hts/hts.rb CHANGED
@@ -13,7 +13,7 @@ module HTS
13
13
  check_closed
14
14
  position = tell
15
15
  ary = map(&name)
16
- seek(position)
16
+ seek(position) if position
17
17
  ary
18
18
  end
19
19
  end
@@ -4,7 +4,6 @@ module HTS
4
4
  # Module for working with C HTSlib.
5
5
  module LibHTS
6
6
  typedef :int64, :hts_pos_t
7
- typedef :pointer, :bam_plp_auto_f
8
7
 
9
8
  # kstring
10
9
 
@@ -352,6 +351,36 @@ module HTS
352
351
  end
353
352
  end
354
353
 
354
+ # Internal: Non-owning view of bam1_t used when the pointer is managed by HTSlib
355
+ # (e.g., pileup/mpileup). This struct mirrors the layout of bam1_t and MUST NOT
356
+ # free memory on GC. Do not expose publicly; use only for read-only access.
357
+ class Bam1View < FFI::Struct
358
+ layout \
359
+ :core, Bam1Core,
360
+ :id, :uint64,
361
+ :data, :pointer, # uint8_t
362
+ :l_data, :int,
363
+ :m_data, :uint32,
364
+ :_mempolicy, :uint32 # bit_fields
365
+ end
366
+
367
+ # Base modification structure
368
+ class HtsBaseMod < FFI::Struct
369
+ layout \
370
+ :modified_base, :int,
371
+ :canonical_base, :int,
372
+ :strand, :int,
373
+ :qual, :int
374
+ end
375
+
376
+ # Base modification state (opaque pointer)
377
+ # Use AutoPointer since the structure is opaque and we only need custom release.
378
+ class HtsBaseModState < FFI::AutoPointer
379
+ def self.release(ptr)
380
+ LibHTS.hts_base_mod_state_free(ptr) unless ptr.null?
381
+ end
382
+ end
383
+
355
384
  typedef :pointer, :bam_plp
356
385
  typedef :pointer, :bam_mplp
357
386
 
@@ -364,7 +393,7 @@ module HTS
364
393
 
365
394
  class BamPileup1 < FFI::BitStruct
366
395
  layout \
367
- :b, Bam1.ptr,
396
+ :b, :pointer,
368
397
  :qpos, :int32,
369
398
  :indel, :int,
370
399
  :level, :int,
@@ -2,6 +2,11 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
+ # Callback type for bam_plp_auto_f: int (*)(void *data, bam1_t *b)
6
+ # Use raw pointer for bam1_t to avoid creating ManagedStruct wrappers (which would double-free)
7
+ callback :bam_plp_auto_f, %i[pointer pointer], :int
8
+ # callback :bam_plp_auto_f, [:pointer, Bam1.by_ref], :int
9
+
5
10
  # Generates a new unpopulated header structure.
6
11
  attach_function \
7
12
  :sam_hdr_init,
@@ -414,24 +419,24 @@ module HTS
414
419
 
415
420
  attach_function \
416
421
  :sam_parse1,
417
- [KString, SamHdr, Bam1],
422
+ [KString, SamHdr, :pointer], # [KString, SamHdr, (Bam1 | Bam1View)]
418
423
  :int
419
424
 
420
425
  attach_function \
421
426
  :sam_format1,
422
- [SamHdr, Bam1, KString],
427
+ [SamHdr, :pointer, KString], # [SamHdr, (Bam1 | Bam1View), KString]
423
428
  :int
424
429
 
425
430
  # Read a record from a file
426
431
  attach_function \
427
432
  :sam_read1,
428
- [HtsFile, SamHdr, Bam1],
433
+ [HtsFile, SamHdr, :pointer], # [HtsFile, SamHdr, (Bam1 | Bam1View)]
429
434
  :int
430
435
 
431
436
  # Write a record to a file
432
437
  attach_function \
433
438
  :sam_write1,
434
- [HtsFile, SamHdr, Bam1],
439
+ [HtsFile, SamHdr, :pointer], # [HtsFile, SamHdr, (Bam1 | Bam1View)]
435
440
  :int
436
441
 
437
442
  # Checks whether a record passes an hts_filter.
@@ -555,28 +560,28 @@ module HTS
555
560
 
556
561
  attach_function \
557
562
  :bam_plp_push,
558
- [:bam_plp, Bam1],
563
+ [:bam_plp, Bam1.by_ref],
559
564
  :int
560
565
 
561
566
  attach_function \
562
567
  :bam_plp_next,
563
568
  %i[bam_plp pointer pointer pointer],
564
- BamPileup1.by_ref
569
+ :pointer # BamPileup1.by_ref
565
570
 
566
571
  attach_function \
567
572
  :bam_plp_auto,
568
573
  %i[bam_plp pointer pointer pointer],
569
- BamPileup1.by_ref
574
+ :pointer # BamPileup1.by_ref
570
575
 
571
576
  attach_function \
572
577
  :bam_plp64_next,
573
578
  %i[bam_plp pointer pointer pointer],
574
- BamPileup1.by_ref
579
+ :pointer # BamPileup1.by_ref
575
580
 
576
581
  attach_function \
577
582
  :bam_plp64_auto,
578
583
  %i[bam_plp pointer pointer pointer],
579
- BamPileup1.by_ref
584
+ :pointer # BamPileup1.by_ref
580
585
 
581
586
  attach_function \
582
587
  :bam_plp_set_maxcnt,
@@ -588,7 +593,9 @@ module HTS
588
593
  [:bam_plp],
589
594
  :void
590
595
 
591
- callback :bam_plp_callback_function, [:pointer, Bam1, BamPileupCd], :int
596
+ # Callback type for constructor/destructor: int (*)(void *data, const bam1_t *b, bam_pileup_cd *cd)
597
+ callback :bam_plp_callback_function, [:pointer, :pointer, BamPileupCd.by_ref], :int
598
+ # callback :bam_plp_callback_function, [:pointer, Bam1.by_ref, BamPileupCd.by_ref], :int
592
599
 
593
600
  # sets a callback to initialise any per-pileup1_t fields.
594
601
  attach_function \
@@ -602,17 +609,21 @@ module HTS
602
609
  :void
603
610
 
604
611
  # Get pileup padded insertion sequence
612
+ # Make pointer passing explicit by using by_ref for structs
605
613
  attach_function \
606
614
  :bam_plp_insertion,
607
- [BamPileup1, KString, :pointer],
615
+ [BamPileup1.by_ref, KString.by_ref, :pointer],
608
616
  :int
609
617
 
610
618
  # Get pileup padded insertion sequence, including base modifications
611
619
  attach_function \
612
620
  :bam_plp_insertion_mod,
613
- [BamPileup1, :pointer, KString, :pointer],
621
+ [BamPileup1.by_ref, HtsBaseModState, KString.by_ref, :pointer],
614
622
  :int
615
623
 
624
+ # NOTE: There is no bam_plp_init_overlaps in HTSlib (only bam_mplp_init_overlaps exists).
625
+ # The incorrect binding is removed to avoid undefined symbol errors.
626
+
616
627
  attach_function \
617
628
  :bam_mplp_init,
618
629
  %i[int bam_plp_auto_f pointer],
@@ -672,61 +683,61 @@ module HTS
672
683
  attach_function \
673
684
  :hts_base_mod_state_alloc,
674
685
  [],
675
- :pointer # hts_base_mod_state
686
+ HtsBaseModState
676
687
 
677
688
  # Destroys an hts_base_mode_state.
678
689
  attach_function \
679
690
  :hts_base_mod_state_free,
680
- [:pointer], # hts_base_mod_state
691
+ [HtsBaseModState],
681
692
  :void
682
693
 
683
694
  # Parses the MM and ML tags out of a bam record.
684
695
  attach_function \
685
696
  :bam_parse_basemod,
686
- [Bam1, :pointer],
697
+ [Bam1, HtsBaseModState],
687
698
  :int
688
699
 
689
700
  # Parses the MM and ML tags out of a bam record.
690
701
  attach_function \
691
702
  :bam_parse_basemod2,
692
- [Bam1, :pointer, :uint32],
703
+ [Bam1, HtsBaseModState, :uint32],
693
704
  :int
694
705
 
695
706
  # Returns modification status for the next base position in the query seq.
696
707
  attach_function \
697
708
  :bam_mods_at_next_pos,
698
- [Bam1, :pointer, :pointer, :int],
709
+ [Bam1, HtsBaseModState, :pointer, :int],
699
710
  :int
700
711
 
701
712
  # Finds the next location containing base modifications and returns them
702
713
  attach_function \
703
714
  :bam_next_basemod,
704
- [Bam1, :pointer, :pointer, :int, :pointer],
715
+ [Bam1, HtsBaseModState, :pointer, :int, :pointer],
705
716
  :int
706
717
 
707
718
  # Returns modification status for a specific query position.
708
719
  attach_function \
709
720
  :bam_mods_at_qpos,
710
- [Bam1, :int, :pointer, :pointer, :int],
721
+ [Bam1, :int, HtsBaseModState, :pointer, :int],
711
722
  :int
712
723
 
713
724
  # Returns data about a specific modification type for the alignment record.
714
725
  attach_function \
715
726
  :bam_mods_query_type,
716
- %i[pointer int pointer pointer string],
727
+ [HtsBaseModState, :int, :pointer, :pointer, :pointer],
717
728
  :int
718
729
 
719
730
  # Returns data about the i^th modification type for the alignment record.
720
731
  attach_function \
721
732
  :bam_mods_queryi,
722
- %i[pointer int pointer pointer string],
733
+ [HtsBaseModState, :int, :pointer, :pointer, :pointer],
723
734
  :int
724
735
 
725
736
  # Returns the list of base modification codes provided for this
726
737
  attach_function \
727
738
  :bam_mods_recorded,
728
- %i[pointer pointer],
729
- :int
739
+ [HtsBaseModState, :pointer],
740
+ :pointer
730
741
  end
731
742
  end
732
743
 
data/lib/hts/libhts.rb CHANGED
@@ -23,6 +23,12 @@ module HTS
23
23
  rescue FFI::NotFoundError => e
24
24
  warn e.message if $VERBOSE
25
25
  end
26
+
27
+ def self.attach_variable(*)
28
+ super
29
+ rescue FFI::NotFoundError => e
30
+ warn e.message if $VERBOSE
31
+ end
26
32
  end
27
33
  end
28
34
 
data/lib/hts/tabix.rb CHANGED
@@ -44,8 +44,27 @@ module HTS
44
44
  @idx = load_index(index)
45
45
  end
46
46
 
47
- def build_index
48
- raise "Not implemented yet"
47
+ def build_index(index_name = nil, min_shift: 0)
48
+ check_closed
49
+
50
+ if index_name
51
+ warn "Create index for #{@file_name} to #{index_name}"
52
+ case LibHTS.tbx_index_build2(@file_name, index_name, min_shift, LibHTS.tbx_conf_vcf)
53
+ when 0 # successful
54
+ when -1 then raise "general failure"
55
+ when -2 then raise "compression not BGZF"
56
+ else raise "unknown error"
57
+ end
58
+ else
59
+ warn "Create index for #{@file_name}"
60
+ case LibHTS.tbx_index_build(@file_name, min_shift, LibHTS.tbx_conf_vcf)
61
+ when 0 # successful
62
+ when -1 then raise "general failure"
63
+ when -2 then raise "compression not BGZF"
64
+ else raise "unknown error"
65
+ end
66
+ end
67
+ self # for method chaining
49
68
  end
50
69
 
51
70
  def load_index(index_name = nil)
data/lib/hts/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: htslib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
@@ -62,10 +62,13 @@ files:
62
62
  - TUTORIAL.md
63
63
  - lib/hts/bam.rb
64
64
  - lib/hts/bam/auxi.rb
65
+ - lib/hts/bam/base_mod.rb
65
66
  - lib/hts/bam/cigar.rb
66
67
  - lib/hts/bam/flag.rb
67
68
  - lib/hts/bam/header.rb
68
69
  - lib/hts/bam/header_record.rb
70
+ - lib/hts/bam/mpileup.rb
71
+ - lib/hts/bam/pileup.rb
69
72
  - lib/hts/bam/record.rb
70
73
  - lib/hts/bcf.rb
71
74
  - lib/hts/bcf/format.rb
@@ -116,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
119
  - !ruby/object:Gem::Version
117
120
  version: '0'
118
121
  requirements: []
119
- rubygems_version: 3.7.1
122
+ rubygems_version: 3.6.9
120
123
  specification_version: 4
121
124
  summary: HTSlib bindings for Ruby
122
125
  test_files: []