htslib 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ed0d57a77d113e37ce3a9c8bf75ad8e35640a82eb51d7af09f39409c26a04ae
4
- data.tar.gz: a5a4092321c2245fd4416ffe2b6b50014b5a6af1a40f20a4ca55a1296b96a2ff
3
+ metadata.gz: 86838244ad390e04fb293d3db9fd639c837dcd7b2309cc0c83da0b5925e7185c
4
+ data.tar.gz: 547063279b934706efcf919d287217f4231fe76d2e8fb3ae6df8e4dd7b6040ad
5
5
  SHA512:
6
- metadata.gz: abd2f5234927ee1ba553c40194865ccfca47e24338c0859b46c370f248d9f7e602068500d6a49bd80b8cb4a12ef3f3e78efb49ff3d30b628bc1fea519d3d3fd6
7
- data.tar.gz: af733241e107742ddc702619ac8ab226daa72bdc515c9f591bb3317be1d3f6556070df8afa664066aca6b38466a84cf9c45c4b76e32c68d607ada759818f7f1c
6
+ metadata.gz: 787f9167abef88ddbd758d3155f1f3568bb99d8a5a340086a1cc677be46e0e77ec5a0ef56d201610f06143d0f54a20a1f2563ae3e080996564b5b5545e3e8a2a
7
+ data.tar.gz: d07ae5e437c9729a39e32adb12ff694560eaa33e44094679199bc8b742ccaab4c5a111c7e6391a314a8b6a19fae82acea549123b57497690678e1b2b1746f2c7
data/TUTORIAL.md CHANGED
@@ -254,6 +254,23 @@ in.close
254
254
  out.close
255
255
  ```
256
256
 
257
+ Update INFO fields
258
+
259
+ ```ruby
260
+ bcf = HTS::Bcf.open("in.vcf")
261
+ record = bcf.first
262
+ info = record.info
263
+
264
+ info.update_int("DP", [30])
265
+ # info.update_int64("DP", [2**40]) # Backend-dependent (BCF_HT_LONG)
266
+ info.update_float("AF", [0.25])
267
+ info.update_string("STR", "sample")
268
+ info.update_flag("SOMATIC", true)
269
+
270
+ info["DP"] = 100
271
+ # Out-of-int32 integers require explicit update_int64
272
+ ```
273
+
257
274
  Writing and modifying auxiliary tags
258
275
 
259
276
  ```ruby
@@ -277,9 +294,14 @@ in_bam.each do |record|
277
294
 
278
295
  # Update or add tags using type-specific methods
279
296
  aux.update_int("AS", 100) # Integer tag
297
+ aux.update_uint8("XI", 255) # Exact unsigned 8-bit integer tag
280
298
  aux.update_float("ZQ", 0.95) # Float tag
299
+ aux.update_double("ZD", 0.125) # Double tag
300
+ aux.update_char("XC", "Y") # Character tag
301
+ aux.update_hex("XH", "DEADBEEF") # Hex string tag
281
302
  aux.update_string("RG", "sample1") # String tag
282
- aux.update_array("BC", [25, 30, 28, 32]) # Array tag
303
+ aux.update_array("BC", [25, 30, 28, 32]) # Array tag (default subtype: i)
304
+ aux.update_array("BQ", [25, 30, 28, 32], type: "C") # Array tag with explicit subtype
283
305
 
284
306
  # Or use the []= operator (auto-detects type)
285
307
  aux["NM"] = 2 # Integer
data/lib/hts/bam/auxi.rb CHANGED
@@ -79,17 +79,49 @@ module HTS
79
79
  # @param key [String] tag name (2 characters)
80
80
  # @param value [Integer] integer value
81
81
  def update_int(key, value)
82
+ validate_tag!(key)
82
83
  ret = LibHTS.bam_aux_update_int(@record.struct, key, value.to_i)
83
84
  raise "Failed to update integer tag '#{key}': errno #{FFI.errno}" if ret < 0
84
85
 
85
86
  value
86
87
  end
87
88
 
89
+ # Update or add a signed 8-bit integer tag.
90
+ def update_int8(key, value)
91
+ update_exact_integer(key, value, "c", -128, 127)
92
+ end
93
+
94
+ # Update or add an unsigned 8-bit integer tag.
95
+ def update_uint8(key, value)
96
+ update_exact_integer(key, value, "C", 0, 255)
97
+ end
98
+
99
+ # Update or add a signed 16-bit integer tag.
100
+ def update_int16(key, value)
101
+ update_exact_integer(key, value, "s", -32_768, 32_767)
102
+ end
103
+
104
+ # Update or add an unsigned 16-bit integer tag.
105
+ def update_uint16(key, value)
106
+ update_exact_integer(key, value, "S", 0, 65_535)
107
+ end
108
+
109
+ # Update or add a signed 32-bit integer tag.
110
+ def update_int32(key, value)
111
+ update_exact_integer(key, value, "i", -2_147_483_648, 2_147_483_647)
112
+ end
113
+
114
+ # Update or add an unsigned 32-bit integer tag.
115
+ def update_uint32(key, value)
116
+ update_exact_integer(key, value, "I", 0, 4_294_967_295)
117
+ end
118
+
88
119
  # Update or add a floating-point tag
89
120
  # For compatibility with HTS.cr.
90
121
  # @param key [String] tag name (2 characters)
91
122
  # @param value [Float] floating-point value
92
123
  def update_float(key, value)
124
+ validate_tag!(key)
93
125
  ret = LibHTS.bam_aux_update_float(@record.struct, key, value.to_f)
94
126
  raise "Failed to update float tag '#{key}': errno #{FFI.errno}" if ret < 0
95
127
 
@@ -101,10 +133,43 @@ module HTS
101
133
  # @param key [String] tag name (2 characters)
102
134
  # @param value [String] string value
103
135
  def update_string(key, value)
104
- ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, value.to_s)
136
+ validate_tag!(key)
137
+ string = value.to_s
138
+ validate_string_value!(string)
139
+ ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, string)
105
140
  raise "Failed to update string tag '#{key}': errno #{FFI.errno}" if ret < 0
106
141
 
107
- value
142
+ string
143
+ end
144
+
145
+ # Update or add a character tag.
146
+ def update_char(key, value)
147
+ validate_tag!(key)
148
+
149
+ string = value.to_s
150
+ validate_char_value!(string)
151
+
152
+ replace_with_append(key, "A", string.b)
153
+ string
154
+ end
155
+
156
+ # Update or add a hexadecimal string tag.
157
+ def update_hex(key, value)
158
+ validate_tag!(key)
159
+
160
+ string = value.to_s
161
+ validate_hex_value!(string)
162
+
163
+ replace_with_append(key, "H", string.b + "\0")
164
+ string
165
+ end
166
+
167
+ # Update or add a double-precision floating-point tag.
168
+ def update_double(key, value)
169
+ validate_tag!(key)
170
+
171
+ replace_with_append(key, "d", [Float(value)].pack("E"))
172
+ value.to_f
108
173
  end
109
174
 
110
175
  # Update or add an array tag
@@ -113,6 +178,7 @@ module HTS
113
178
  # @param value [Array] array of integers or floats
114
179
  # @param type [String, nil] element type ('c', 'C', 's', 'S', 'i', 'I', 'f'). Auto-detected if nil.
115
180
  def update_array(key, value, type: nil)
181
+ validate_tag!(key)
116
182
  raise ArgumentError, "Array cannot be empty" if value.empty?
117
183
 
118
184
  # Auto-detect type if not specified
@@ -127,21 +193,10 @@ module HTS
127
193
  end
128
194
  end
129
195
 
130
- # Convert array to appropriate C type
131
- case type
132
- when "c", "C", "s", "S", "i", "I"
133
- # Integer types
134
- ptr = FFI::MemoryPointer.new(:int32, value.size)
135
- ptr.write_array_of_int32(value.map(&:to_i))
136
- ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
137
- when "f"
138
- # Float type
139
- ptr = FFI::MemoryPointer.new(:float, value.size)
140
- ptr.write_array_of_float(value.map(&:to_f))
141
- ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
142
- else
143
- raise ArgumentError, "Invalid array type: #{type}"
144
- end
196
+ payload = pack_array_payload(value, type)
197
+ ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
198
+ ptr.put_bytes(0, payload)
199
+ ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
145
200
 
146
201
  raise "Failed to update array tag '#{key}': errno #{FFI.errno}" if ret < 0
147
202
 
@@ -180,7 +235,7 @@ module HTS
180
235
  get_ruby_aux(aux_ptr)
181
236
  end
182
237
 
183
- def each
238
+ def each_value
184
239
  return enum_for(__method__) unless block_given?
185
240
 
186
241
  aux_ptr = first_pointer
@@ -193,6 +248,45 @@ module HTS
193
248
  end
194
249
  end
195
250
 
251
+ # Iterate auxiliary tags as key-value pairs.
252
+ #
253
+ # @yieldparam tag [String] 2-byte AUX tag name
254
+ # @yieldparam value [Object] Ruby representation of the AUX value
255
+ def each
256
+ return enum_for(__method__) unless block_given?
257
+
258
+ aux_ptr = first_pointer
259
+ return nil if aux_ptr.null?
260
+
261
+ loop do
262
+ tag = FFI::Pointer.new(aux_ptr.address - 2).read_string(2)
263
+ yield tag, get_ruby_aux(aux_ptr)
264
+ aux_ptr = LibHTS.bam_aux_next(@record.struct, aux_ptr)
265
+ break if aux_ptr.null?
266
+ end
267
+ end
268
+
269
+ alias each_pair each
270
+
271
+ # Iterate auxiliary tags with their SAM/BAM type.
272
+ #
273
+ # @yieldparam tag [String] 2-byte AUX tag name
274
+ # @yieldparam type [String] AUX type, e.g. "i", "Z", or "B:C"
275
+ # @yieldparam value [Object] Ruby representation of the AUX value
276
+ def each_with_type
277
+ return enum_for(__method__) unless block_given?
278
+
279
+ aux_ptr = first_pointer
280
+ return nil if aux_ptr.null?
281
+
282
+ loop do
283
+ tag = FFI::Pointer.new(aux_ptr.address - 2).read_string(2)
284
+ yield tag, aux_type(aux_ptr), get_ruby_aux(aux_ptr)
285
+ aux_ptr = LibHTS.bam_aux_next(@record.struct, aux_ptr)
286
+ break if aux_ptr.null?
287
+ end
288
+ end
289
+
196
290
  def to_h
197
291
  h = {}
198
292
  aux_ptr = first_pointer
@@ -213,8 +307,120 @@ module HTS
213
307
  LibHTS.bam_aux_first(@record.struct)
214
308
  end
215
309
 
310
+ def aux_type(aux_ptr)
311
+ type = aux_ptr.read_string(1)
312
+ return type unless type == "B"
313
+
314
+ "#{type}:#{aux_ptr.read_string(2)[1]}"
315
+ end
316
+
317
+ def validate_tag!(key)
318
+ return if key.is_a?(String) && key.bytesize == 2 && key.ascii_only?
319
+
320
+ raise ArgumentError, "AUX tag must be a 2-byte ASCII String"
321
+ end
322
+
323
+ def validate_string_value!(string)
324
+ return unless string.include?("\0")
325
+
326
+ raise ArgumentError, "String AUX tags must not contain NUL bytes"
327
+ end
328
+
329
+ def validate_char_value!(string)
330
+ return if string.bytesize == 1 && string.ascii_only? && /\A[!-~]\z/.match?(string)
331
+
332
+ raise ArgumentError, "Character AUX tags must be a single printable ASCII byte"
333
+ end
334
+
335
+ def validate_hex_value!(string)
336
+ raise ArgumentError, "Hex AUX tags must contain an even number of characters" if string.bytesize.odd?
337
+
338
+ return if string.ascii_only? && /\A[0-9A-Fa-f]*\z/.match?(string)
339
+
340
+ raise ArgumentError,
341
+ "Hex AUX tags must contain only ASCII hexadecimal characters"
342
+ end
343
+
344
+ def update_exact_integer(key, value, type, min, max)
345
+ validate_tag!(key)
346
+
347
+ integer = Integer(value)
348
+ raise RangeError, "Value #{integer} is out of range for AUX type #{type}" unless integer.between?(min, max)
349
+
350
+ replace_with_append(key, type, pack_scalar_payload(integer, type))
351
+ integer
352
+ end
353
+
354
+ def replace_with_append(key, type, payload)
355
+ delete(key) if key?(key)
356
+
357
+ ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
358
+ ptr.put_bytes(0, payload)
359
+ ret = LibHTS.bam_aux_append(@record.struct, key, type.ord, payload.bytesize, ptr)
360
+ raise "Failed to update #{type} tag '#{key}': errno #{FFI.errno}" if ret < 0
361
+
362
+ true
363
+ end
364
+
365
+ def pack_scalar_payload(value, type)
366
+ case type
367
+ when "c"
368
+ [value].pack("c")
369
+ when "C"
370
+ [value].pack("C")
371
+ when "s"
372
+ [value].pack("s<")
373
+ when "S"
374
+ [value].pack("S<")
375
+ when "i"
376
+ [value].pack("l<")
377
+ when "I"
378
+ [value].pack("L<")
379
+ else
380
+ raise ArgumentError, "Unsupported scalar AUX type: #{type}"
381
+ end
382
+ end
383
+
384
+ def pack_array_payload(value, type)
385
+ case type
386
+ when "c"
387
+ validate_integer_array_range!(value, -128, 127, type)
388
+ value.pack("c*")
389
+ when "C"
390
+ validate_integer_array_range!(value, 0, 255, type)
391
+ value.pack("C*")
392
+ when "s"
393
+ validate_integer_array_range!(value, -32_768, 32_767, type)
394
+ value.pack("s<*")
395
+ when "S"
396
+ validate_integer_array_range!(value, 0, 65_535, type)
397
+ value.pack("S<*")
398
+ when "i"
399
+ validate_integer_array_range!(value, -2_147_483_648, 2_147_483_647, type)
400
+ value.pack("l<*")
401
+ when "I"
402
+ validate_integer_array_range!(value, 0, 4_294_967_295, type)
403
+ value.pack("L<*")
404
+ when "f"
405
+ value.map(&:to_f).pack("e*")
406
+ else
407
+ raise ArgumentError, "Invalid array type: #{type}"
408
+ end
409
+ end
410
+
411
+ def validate_integer_array_range!(value, min, max, type)
412
+ value.each do |element|
413
+ integer = Integer(element)
414
+ unless integer.between?(min, max)
415
+ raise RangeError, "Array element #{integer} is out of range for AUX array type #{type}"
416
+ end
417
+ end
418
+ end
419
+
216
420
  def get_ruby_aux(aux_ptr, type = nil)
217
- type = type ? type.to_s : aux_ptr.read_string(1)
421
+ actual_type = aux_ptr.read_string(1)
422
+ type = type ? type.to_s : actual_type
423
+ validate_aux_type!(actual_type, type)
218
424
 
219
425
  # A (character), B (general array),
220
426
  # f (real number), H (hexadecimal array),
@@ -246,6 +452,27 @@ module HTS
246
452
  raise NotImplementedError, "type: #{type}"
247
453
  end
248
454
  end
455
+
456
+ def validate_aux_type!(actual_type, requested_type)
457
+ return if aux_type_compatible?(actual_type, requested_type)
458
+
459
+ raise TypeError, "AUX type mismatch: requested #{requested_type.inspect}, actual #{actual_type.inspect}"
460
+ end
461
+
462
+ def aux_type_compatible?(actual_type, requested_type)
463
+ case requested_type
464
+ when "i", "I", "c", "C", "s", "S"
465
+ %w[i I c C s S].include?(actual_type)
466
+ when "f", "d"
467
+ %w[f d].include?(actual_type)
468
+ when "Z", "H"
469
+ %w[Z H].include?(actual_type)
470
+ when "A", "B"
471
+ actual_type == requested_type
472
+ else
473
+ true
474
+ end
475
+ end
249
476
  end
250
477
  end
251
478
  end
data/lib/hts/bam/cigar.rb CHANGED
@@ -15,11 +15,19 @@ module HTS
15
15
  def self.parse(str)
16
16
  c = FFI::MemoryPointer.new(:pointer)
17
17
  m = FFI::MemoryPointer.new(:size_t)
18
- LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
19
- cigar_array = c.read_pointer.read_array_of_uint32(m.read(:size_t))
18
+ c.write_pointer(FFI::Pointer::NULL)
19
+ m.write(:size_t, 0)
20
+ ptr = nil
21
+ n_cigar = LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
22
+ raise "sam_parse_cigar failed: #{n_cigar}" if n_cigar.negative?
23
+
24
+ ptr = c.read_pointer
25
+ cigar_array = ptr.null? ? [] : ptr.read_array_of_uint32(n_cigar)
20
26
  obj = new
21
27
  obj.array = cigar_array
22
28
  obj
29
+ ensure
30
+ LibHTS.hts_free(ptr) if ptr && !ptr.null?
23
31
  end
24
32
 
25
33
  def initialize(record = nil)