htslib 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ed0d57a77d113e37ce3a9c8bf75ad8e35640a82eb51d7af09f39409c26a04ae
4
- data.tar.gz: a5a4092321c2245fd4416ffe2b6b50014b5a6af1a40f20a4ca55a1296b96a2ff
3
+ metadata.gz: 26b019acbf5b8e110829d2666710f9187923edc57a4af486be8fc4ac690cd0e0
4
+ data.tar.gz: 8edf3b8fc44ce5ce42f0d09f2dcfffdb3dd4b145e72aac28bf13b4daa6d952ad
5
5
  SHA512:
6
- metadata.gz: abd2f5234927ee1ba553c40194865ccfca47e24338c0859b46c370f248d9f7e602068500d6a49bd80b8cb4a12ef3f3e78efb49ff3d30b628bc1fea519d3d3fd6
7
- data.tar.gz: af733241e107742ddc702619ac8ab226daa72bdc515c9f591bb3317be1d3f6556070df8afa664066aca6b38466a84cf9c45c4b76e32c68d607ada759818f7f1c
6
+ metadata.gz: 6429da59067ed17e863117a98320304eb9445b82eb0a8ed94c6afe0b5c01cd149797e068cfa6c14cb32f7e60c0cfeedfc9c49d47cf7a5d92191a3818ad8f63a6
7
+ data.tar.gz: 5f6ce5d70616eb23213ebde0a77b1a0efb66664263c839eb3d00bc08d957c77655d9b03c426573e3db7b5a9d459f8ec86077e771b55b51523fbda520ac82cfcb
data/TUTORIAL.md CHANGED
@@ -254,6 +254,23 @@ in.close
254
254
  out.close
255
255
  ```
256
256
 
257
+ Update INFO fields
258
+
259
+ ```ruby
260
+ bcf = HTS::Bcf.open("in.vcf")
261
+ record = bcf.first
262
+ info = record.info
263
+
264
+ info.update_int("DP", [30])
265
+ # info.update_int64("DP", [2**40]) # Backend-dependent (BCF_HT_LONG)
266
+ info.update_float("AF", [0.25])
267
+ info.update_string("STR", "sample")
268
+ info.update_flag("SOMATIC", true)
269
+
270
+ info["DP"] = 100
271
+ # Out-of-int32 integers require explicit update_int64
272
+ ```
273
+
257
274
  Writing and modifying auxiliary tags
258
275
 
259
276
  ```ruby
@@ -277,9 +294,14 @@ in_bam.each do |record|
277
294
 
278
295
  # Update or add tags using type-specific methods
279
296
  aux.update_int("AS", 100) # Integer tag
297
+ aux.update_uint8("XI", 255) # Exact unsigned 8-bit integer tag
280
298
  aux.update_float("ZQ", 0.95) # Float tag
299
+ aux.update_double("ZD", 0.125) # Double tag
300
+ aux.update_char("XC", "Y") # Character tag
301
+ aux.update_hex("XH", "DEADBEEF") # Hex string tag
281
302
  aux.update_string("RG", "sample1") # String tag
282
- aux.update_array("BC", [25, 30, 28, 32]) # Array tag
303
+ aux.update_array("BC", [25, 30, 28, 32]) # Array tag (default subtype: i)
304
+ aux.update_array("BQ", [25, 30, 28, 32], type: "C") # Array tag with explicit subtype
283
305
 
284
306
  # Or use the []= operator (auto-detects type)
285
307
  aux["NM"] = 2 # Integer
data/lib/hts/bam/auxi.rb CHANGED
@@ -79,17 +79,49 @@ module HTS
79
79
  # @param key [String] tag name (2 characters)
80
80
  # @param value [Integer] integer value
81
81
  def update_int(key, value)
82
+ validate_tag!(key)
82
83
  ret = LibHTS.bam_aux_update_int(@record.struct, key, value.to_i)
83
84
  raise "Failed to update integer tag '#{key}': errno #{FFI.errno}" if ret < 0
84
85
 
85
86
  value
86
87
  end
87
88
 
89
+ # Update or add a signed 8-bit integer tag.
90
+ def update_int8(key, value)
91
+ update_exact_integer(key, value, "c", -128, 127)
92
+ end
93
+
94
+ # Update or add an unsigned 8-bit integer tag.
95
+ def update_uint8(key, value)
96
+ update_exact_integer(key, value, "C", 0, 255)
97
+ end
98
+
99
+ # Update or add a signed 16-bit integer tag.
100
+ def update_int16(key, value)
101
+ update_exact_integer(key, value, "s", -32_768, 32_767)
102
+ end
103
+
104
+ # Update or add an unsigned 16-bit integer tag.
105
+ def update_uint16(key, value)
106
+ update_exact_integer(key, value, "S", 0, 65_535)
107
+ end
108
+
109
+ # Update or add a signed 32-bit integer tag.
110
+ def update_int32(key, value)
111
+ update_exact_integer(key, value, "i", -2_147_483_648, 2_147_483_647)
112
+ end
113
+
114
+ # Update or add an unsigned 32-bit integer tag.
115
+ def update_uint32(key, value)
116
+ update_exact_integer(key, value, "I", 0, 4_294_967_295)
117
+ end
118
+
88
119
  # Update or add a floating-point tag
89
120
  # For compatibility with HTS.cr.
90
121
  # @param key [String] tag name (2 characters)
91
122
  # @param value [Float] floating-point value
92
123
  def update_float(key, value)
124
+ validate_tag!(key)
93
125
  ret = LibHTS.bam_aux_update_float(@record.struct, key, value.to_f)
94
126
  raise "Failed to update float tag '#{key}': errno #{FFI.errno}" if ret < 0
95
127
 
@@ -101,10 +133,43 @@ module HTS
101
133
  # @param key [String] tag name (2 characters)
102
134
  # @param value [String] string value
103
135
  def update_string(key, value)
104
- ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, value.to_s)
136
+ validate_tag!(key)
137
+ string = value.to_s
138
+ validate_string_value!(string)
139
+ ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, string)
105
140
  raise "Failed to update string tag '#{key}': errno #{FFI.errno}" if ret < 0
106
141
 
107
- value
142
+ string
143
+ end
144
+
145
+ # Update or add a character tag.
146
+ def update_char(key, value)
147
+ validate_tag!(key)
148
+
149
+ string = value.to_s
150
+ validate_char_value!(string)
151
+
152
+ replace_with_append(key, "A", string.b)
153
+ string
154
+ end
155
+
156
+ # Update or add a hexadecimal string tag.
157
+ def update_hex(key, value)
158
+ validate_tag!(key)
159
+
160
+ string = value.to_s
161
+ validate_hex_value!(string)
162
+
163
+ replace_with_append(key, "H", string.b + "\0")
164
+ string
165
+ end
166
+
167
+ # Update or add a double-precision floating-point tag.
168
+ def update_double(key, value)
169
+ validate_tag!(key)
170
+
171
+ replace_with_append(key, "d", [Float(value)].pack("E"))
172
+ value.to_f
108
173
  end
109
174
 
110
175
  # Update or add an array tag
@@ -113,6 +178,7 @@ module HTS
113
178
  # @param value [Array] array of integers or floats
114
179
  # @param type [String, nil] element type ('c', 'C', 's', 'S', 'i', 'I', 'f'). Auto-detected if nil.
115
180
  def update_array(key, value, type: nil)
181
+ validate_tag!(key)
116
182
  raise ArgumentError, "Array cannot be empty" if value.empty?
117
183
 
118
184
  # Auto-detect type if not specified
@@ -127,21 +193,10 @@ module HTS
127
193
  end
128
194
  end
129
195
 
130
- # Convert array to appropriate C type
131
- case type
132
- when "c", "C", "s", "S", "i", "I"
133
- # Integer types
134
- ptr = FFI::MemoryPointer.new(:int32, value.size)
135
- ptr.write_array_of_int32(value.map(&:to_i))
136
- ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
137
- when "f"
138
- # Float type
139
- ptr = FFI::MemoryPointer.new(:float, value.size)
140
- ptr.write_array_of_float(value.map(&:to_f))
141
- ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
142
- else
143
- raise ArgumentError, "Invalid array type: #{type}"
144
- end
196
+ payload = pack_array_payload(value, type)
197
+ ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
198
+ ptr.put_bytes(0, payload)
199
+ ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
145
200
 
146
201
  raise "Failed to update array tag '#{key}': errno #{FFI.errno}" if ret < 0
147
202
 
@@ -180,7 +235,7 @@ module HTS
180
235
  get_ruby_aux(aux_ptr)
181
236
  end
182
237
 
183
- def each
238
+ def each_value
184
239
  return enum_for(__method__) unless block_given?
185
240
 
186
241
  aux_ptr = first_pointer
@@ -193,6 +248,27 @@ module HTS
193
248
  end
194
249
  end
195
250
 
251
+ # Iterate auxiliary tags with their SAM/BAM type.
252
+ #
253
+ # @yieldparam tag [String] 2-byte AUX tag name
254
+ # @yieldparam type [String] AUX type, e.g. "i", "Z", or "B:C"
255
+ # @yieldparam value [Object] Ruby representation of the AUX value
256
+ def each
257
+ return enum_for(__method__) unless block_given?
258
+
259
+ aux_ptr = first_pointer
260
+ return nil if aux_ptr.null?
261
+
262
+ loop do
263
+ tag = FFI::Pointer.new(aux_ptr.address - 2).read_string(2)
264
+ yield tag, aux_type(aux_ptr), get_ruby_aux(aux_ptr)
265
+ aux_ptr = LibHTS.bam_aux_next(@record.struct, aux_ptr)
266
+ break if aux_ptr.null?
267
+ end
268
+ end
269
+
270
+ alias each_pair each
271
+
196
272
  def to_h
197
273
  h = {}
198
274
  aux_ptr = first_pointer
@@ -213,8 +289,120 @@ module HTS
213
289
  LibHTS.bam_aux_first(@record.struct)
214
290
  end
215
291
 
292
+ def aux_type(aux_ptr)
293
+ type = aux_ptr.read_string(1)
294
+ return type unless type == "B"
295
+
296
+ "#{type}:#{aux_ptr.read_string(2)[1]}"
297
+ end
298
+
299
+ def validate_tag!(key)
300
+ return if key.is_a?(String) && key.bytesize == 2 && key.ascii_only?
301
+
302
+ raise ArgumentError, "AUX tag must be a 2-byte ASCII String"
303
+ end
304
+
305
+ def validate_string_value!(string)
306
+ return unless string.include?("\0")
307
+
308
+ raise ArgumentError, "String AUX tags must not contain NUL bytes"
309
+ end
310
+
311
+ def validate_char_value!(string)
312
+ return if string.bytesize == 1 && string.ascii_only? && /\A[!-~]\z/.match?(string)
313
+
314
+ raise ArgumentError, "Character AUX tags must be a single printable ASCII byte"
315
+ end
316
+
317
+ def validate_hex_value!(string)
318
+ raise ArgumentError, "Hex AUX tags must contain an even number of characters" if string.bytesize.odd?
319
+
320
+ return if string.ascii_only? && /\A[0-9A-Fa-f]*\z/.match?(string)
321
+
322
+ raise ArgumentError,
323
+ "Hex AUX tags must contain only ASCII hexadecimal characters"
324
+ end
325
+
326
+ def update_exact_integer(key, value, type, min, max)
327
+ validate_tag!(key)
328
+
329
+ integer = Integer(value)
330
+ raise RangeError, "Value #{integer} is out of range for AUX type #{type}" unless integer.between?(min, max)
331
+
332
+ replace_with_append(key, type, pack_scalar_payload(integer, type))
333
+ integer
334
+ end
335
+
336
+ def replace_with_append(key, type, payload)
337
+ delete(key) if key?(key)
338
+
339
+ ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
340
+ ptr.put_bytes(0, payload)
341
+ ret = LibHTS.bam_aux_append(@record.struct, key, type.ord, payload.bytesize, ptr)
342
+ raise "Failed to update #{type} tag '#{key}': errno #{FFI.errno}" if ret < 0
343
+
344
+ true
345
+ end
346
+
347
+ def pack_scalar_payload(value, type)
348
+ case type
349
+ when "c"
350
+ [value].pack("c")
351
+ when "C"
352
+ [value].pack("C")
353
+ when "s"
354
+ [value].pack("s<")
355
+ when "S"
356
+ [value].pack("S<")
357
+ when "i"
358
+ [value].pack("l<")
359
+ when "I"
360
+ [value].pack("L<")
361
+ else
362
+ raise ArgumentError, "Unsupported scalar AUX type: #{type}"
363
+ end
364
+ end
365
+
366
+ def pack_array_payload(value, type)
367
+ case type
368
+ when "c"
369
+ validate_integer_array_range!(value, -128, 127, type)
370
+ value.pack("c*")
371
+ when "C"
372
+ validate_integer_array_range!(value, 0, 255, type)
373
+ value.pack("C*")
374
+ when "s"
375
+ validate_integer_array_range!(value, -32_768, 32_767, type)
376
+ value.pack("s<*")
377
+ when "S"
378
+ validate_integer_array_range!(value, 0, 65_535, type)
379
+ value.pack("S<*")
380
+ when "i"
381
+ validate_integer_array_range!(value, -2_147_483_648, 2_147_483_647, type)
382
+ value.pack("l<*")
383
+ when "I"
384
+ validate_integer_array_range!(value, 0, 4_294_967_295, type)
385
+ value.pack("L<*")
386
+ when "f"
387
+ value.map(&:to_f).pack("e*")
388
+ else
389
+ raise ArgumentError, "Invalid array type: #{type}"
390
+ end
391
+ end
392
+
393
+ def validate_integer_array_range!(value, min, max, type)
394
+ value.each do |element|
395
+ integer = Integer(element)
396
+ unless integer.between?(min, max)
397
+ raise RangeError, "Array element #{integer} is out of range for AUX array type #{type}"
398
+ end
399
+ end
400
+ end
401
+
216
402
  def get_ruby_aux(aux_ptr, type = nil)
217
- type = type ? type.to_s : aux_ptr.read_string(1)
403
+ actual_type = aux_ptr.read_string(1)
404
+ type = type ? type.to_s : actual_type
405
+ validate_aux_type!(actual_type, type)
218
406
 
219
407
  # A (character), B (general array),
220
408
  # f (real number), H (hexadecimal array),
@@ -246,6 +434,27 @@ module HTS
246
434
  raise NotImplementedError, "type: #{type}"
247
435
  end
248
436
  end
437
+
438
+ def validate_aux_type!(actual_type, requested_type)
439
+ return if aux_type_compatible?(actual_type, requested_type)
440
+
441
+ raise TypeError, "AUX type mismatch: requested #{requested_type.inspect}, actual #{actual_type.inspect}"
442
+ end
443
+
444
+ def aux_type_compatible?(actual_type, requested_type)
445
+ case requested_type
446
+ when "i", "I", "c", "C", "s", "S"
447
+ %w[i I c C s S].include?(actual_type)
448
+ when "f", "d"
449
+ %w[f d].include?(actual_type)
450
+ when "Z", "H"
451
+ %w[Z H].include?(actual_type)
452
+ when "A", "B"
453
+ actual_type == requested_type
454
+ else
455
+ true
456
+ end
457
+ end
249
458
  end
250
459
  end
251
460
  end
data/lib/hts/bam/cigar.rb CHANGED
@@ -15,11 +15,19 @@ module HTS
15
15
  def self.parse(str)
16
16
  c = FFI::MemoryPointer.new(:pointer)
17
17
  m = FFI::MemoryPointer.new(:size_t)
18
- LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
19
- cigar_array = c.read_pointer.read_array_of_uint32(m.read(:size_t))
18
+ c.write_pointer(FFI::Pointer::NULL)
19
+ m.write(:size_t, 0)
20
+ ptr = nil
21
+ n_cigar = LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
22
+ raise "sam_parse_cigar failed: #{n_cigar}" if n_cigar.negative?
23
+
24
+ ptr = c.read_pointer
25
+ cigar_array = ptr.null? ? [] : ptr.read_array_of_uint32(n_cigar)
20
26
  obj = new
21
27
  obj.array = cigar_array
22
28
  obj
29
+ ensure
30
+ LibHTS.hts_free(ptr) if ptr && !ptr.null?
23
31
  end
24
32
 
25
33
  def initialize(record = nil)