htslib 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1bf158506931c62ffae1a524158de9fbb451796a68a7586cf788203f67a8cc4
4
- data.tar.gz: d3289551dac8783cfa23f1f8d44e1b4be44b6dab3d6369f816491ceea653188f
3
+ metadata.gz: 26b019acbf5b8e110829d2666710f9187923edc57a4af486be8fc4ac690cd0e0
4
+ data.tar.gz: 8edf3b8fc44ce5ce42f0d09f2dcfffdb3dd4b145e72aac28bf13b4daa6d952ad
5
5
  SHA512:
6
- metadata.gz: d1c316a599c2dc08e6589f980e9dd4ae6d8d6bababbfef424f35e5c25453b667bdc48570578e88a9eba142553fffb25b49ae4de54a061f99f7cbf286988ec618
7
- data.tar.gz: 8529c6a02c354419dd722abc0bd6f27ec514ffe980f4b0d7014914de11551a45c4e96b803bc77255c3b423129a8743a3c39556d9d3d44ec6c6692556485379cf
6
+ metadata.gz: 6429da59067ed17e863117a98320304eb9445b82eb0a8ed94c6afe0b5c01cd149797e068cfa6c14cb32f7e60c0cfeedfc9c49d47cf7a5d92191a3818ad8f63a6
7
+ data.tar.gz: 5f6ce5d70616eb23213ebde0a77b1a0efb66664263c839eb3d00bc08d957c77655d9b03c426573e3db7b5a9d459f8ec86077e771b55b51523fbda520ac82cfcb
data/TUTORIAL.md CHANGED
@@ -254,6 +254,73 @@ in.close
254
254
  out.close
255
255
  ```
256
256
 
257
+ Update INFO fields
258
+
259
+ ```ruby
260
+ bcf = HTS::Bcf.open("in.vcf")
261
+ record = bcf.first
262
+ info = record.info
263
+
264
+ info.update_int("DP", [30])
265
+ # info.update_int64("DP", [2**40]) # Backend-dependent (BCF_HT_LONG)
266
+ info.update_float("AF", [0.25])
267
+ info.update_string("STR", "sample")
268
+ info.update_flag("SOMATIC", true)
269
+
270
+ info["DP"] = 100
271
+ # Out-of-int32 integers require explicit update_int64
272
+ ```
273
+
274
+ Writing and modifying auxiliary tags
275
+
276
+ ```ruby
277
+ # Reading auxiliary tags
278
+ bam = HTS::Bam.open("input.bam")
279
+ record = bam.first
280
+ aux = record.aux
281
+
282
+ # Read tags
283
+ alignment_score = aux["AS"] # Auto-detect type
284
+ mc_cigar = aux.get_string("MC") # Type-specific getter
285
+ edit_distance = aux.get_int("NM") # Type-specific getter
286
+
287
+ # Writing/updating auxiliary tags
288
+ in_bam = HTS::Bam.open("input.bam")
289
+ out_bam = HTS::Bam.open("output.bam", "wb")
290
+ out_bam.write_header(in_bam.header)
291
+
292
+ in_bam.each do |record|
293
+ aux = record.aux
294
+
295
+ # Update or add tags using type-specific methods
296
+ aux.update_int("AS", 100) # Integer tag
297
+ aux.update_uint8("XI", 255) # Exact unsigned 8-bit integer tag
298
+ aux.update_float("ZQ", 0.95) # Float tag
299
+ aux.update_double("ZD", 0.125) # Double tag
300
+ aux.update_char("XC", "Y") # Character tag
301
+ aux.update_hex("XH", "DEADBEEF") # Hex string tag
302
+ aux.update_string("RG", "sample1") # String tag
303
+ aux.update_array("BC", [25, 30, 28, 32]) # Array tag (default subtype: i)
304
+ aux.update_array("BQ", [25, 30, 28, 32], type: "C") # Array tag with explicit subtype
305
+
306
+ # Or use the []= operator (auto-detects type)
307
+ aux["NM"] = 2 # Integer
308
+ aux["ZS"] = "modified" # String
309
+ aux["ZF"] = 3.14 # Float
310
+ aux["ZA"] = [1, 2, 3, 4] # Array
311
+
312
+ # Check if tag exists
313
+ if aux.key?("XS")
314
+ aux.delete("XS") # Delete tag
315
+ end
316
+
317
+ out_bam.write(record)
318
+ end
319
+
320
+ in_bam.close
321
+ out_bam.close
322
+ ```
323
+
257
324
  Create index
258
325
 
259
326
  ```ruby
data/lib/hts/bam/auxi.rb CHANGED
@@ -55,6 +55,179 @@ module HTS
55
55
  get(key)
56
56
  end
57
57
 
58
+ # Set auxiliary tag value (auto-detects type from value)
59
+ # For compatibility with HTS.cr.
60
+ # @param key [String] tag name (2 characters)
61
+ # @param value [Integer, Float, String, Array] tag value
62
+ def []=(key, value)
63
+ case value
64
+ when Integer
65
+ update_int(key, value)
66
+ when Float
67
+ update_float(key, value)
68
+ when String
69
+ update_string(key, value)
70
+ when Array
71
+ update_array(key, value)
72
+ else
73
+ raise ArgumentError, "Unsupported type: #{value.class}"
74
+ end
75
+ end
76
+
77
+ # Update or add an integer tag
78
+ # For compatibility with HTS.cr.
79
+ # @param key [String] tag name (2 characters)
80
+ # @param value [Integer] integer value
81
+ def update_int(key, value)
82
+ validate_tag!(key)
83
+ ret = LibHTS.bam_aux_update_int(@record.struct, key, value.to_i)
84
+ raise "Failed to update integer tag '#{key}': errno #{FFI.errno}" if ret < 0
85
+
86
+ value
87
+ end
88
+
89
+ # Update or add a signed 8-bit integer tag.
90
+ def update_int8(key, value)
91
+ update_exact_integer(key, value, "c", -128, 127)
92
+ end
93
+
94
+ # Update or add an unsigned 8-bit integer tag.
95
+ def update_uint8(key, value)
96
+ update_exact_integer(key, value, "C", 0, 255)
97
+ end
98
+
99
+ # Update or add a signed 16-bit integer tag.
100
+ def update_int16(key, value)
101
+ update_exact_integer(key, value, "s", -32_768, 32_767)
102
+ end
103
+
104
+ # Update or add an unsigned 16-bit integer tag.
105
+ def update_uint16(key, value)
106
+ update_exact_integer(key, value, "S", 0, 65_535)
107
+ end
108
+
109
+ # Update or add a signed 32-bit integer tag.
110
+ def update_int32(key, value)
111
+ update_exact_integer(key, value, "i", -2_147_483_648, 2_147_483_647)
112
+ end
113
+
114
+ # Update or add an unsigned 32-bit integer tag.
115
+ def update_uint32(key, value)
116
+ update_exact_integer(key, value, "I", 0, 4_294_967_295)
117
+ end
118
+
119
+ # Update or add a floating-point tag
120
+ # For compatibility with HTS.cr.
121
+ # @param key [String] tag name (2 characters)
122
+ # @param value [Float] floating-point value
123
+ def update_float(key, value)
124
+ validate_tag!(key)
125
+ ret = LibHTS.bam_aux_update_float(@record.struct, key, value.to_f)
126
+ raise "Failed to update float tag '#{key}': errno #{FFI.errno}" if ret < 0
127
+
128
+ value
129
+ end
130
+
131
+ # Update or add a string tag
132
+ # For compatibility with HTS.cr.
133
+ # @param key [String] tag name (2 characters)
134
+ # @param value [String] string value
135
+ def update_string(key, value)
136
+ validate_tag!(key)
137
+ string = value.to_s
138
+ validate_string_value!(string)
139
+ ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, string)
140
+ raise "Failed to update string tag '#{key}': errno #{FFI.errno}" if ret < 0
141
+
142
+ string
143
+ end
144
+
145
+ # Update or add a character tag.
146
+ def update_char(key, value)
147
+ validate_tag!(key)
148
+
149
+ string = value.to_s
150
+ validate_char_value!(string)
151
+
152
+ replace_with_append(key, "A", string.b)
153
+ string
154
+ end
155
+
156
+ # Update or add a hexadecimal string tag.
157
+ def update_hex(key, value)
158
+ validate_tag!(key)
159
+
160
+ string = value.to_s
161
+ validate_hex_value!(string)
162
+
163
+ replace_with_append(key, "H", string.b + "\0")
164
+ string
165
+ end
166
+
167
+ # Update or add a double-precision floating-point tag.
168
+ def update_double(key, value)
169
+ validate_tag!(key)
170
+
171
+ replace_with_append(key, "d", [Float(value)].pack("E"))
172
+ value.to_f
173
+ end
174
+
175
+ # Update or add an array tag
176
+ # For compatibility with HTS.cr.
177
+ # @param key [String] tag name (2 characters)
178
+ # @param value [Array] array of integers or floats
179
+ # @param type [String, nil] element type ('c', 'C', 's', 'S', 'i', 'I', 'f'). Auto-detected if nil.
180
+ def update_array(key, value, type: nil)
181
+ validate_tag!(key)
182
+ raise ArgumentError, "Array cannot be empty" if value.empty?
183
+
184
+ # Auto-detect type if not specified
185
+ if type.nil?
186
+ if value.all? { |v| v.is_a?(Integer) }
187
+ # Use 'i' for signed 32-bit integers by default
188
+ type = "i"
189
+ elsif value.all? { |v| v.is_a?(Float) || v.is_a?(Integer) }
190
+ type = "f"
191
+ else
192
+ raise ArgumentError, "Array must contain only integers or floats"
193
+ end
194
+ end
195
+
196
+ payload = pack_array_payload(value, type)
197
+ ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
198
+ ptr.put_bytes(0, payload)
199
+ ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
200
+
201
+ raise "Failed to update array tag '#{key}': errno #{FFI.errno}" if ret < 0
202
+
203
+ value
204
+ end
205
+
206
+ # Delete an auxiliary tag
207
+ # For compatibility with HTS.cr.
208
+ # @param key [String] tag name (2 characters)
209
+ # @return [Boolean] true if tag was deleted, false if tag was not found
210
+ def delete(key)
211
+ aux_ptr = LibHTS.bam_aux_get(@record.struct, key)
212
+ return false if aux_ptr.null?
213
+
214
+ ret = LibHTS.bam_aux_del(@record.struct, aux_ptr)
215
+ raise "Failed to delete tag '#{key}': errno #{FFI.errno}" if ret < 0
216
+
217
+ true
218
+ end
219
+
220
+ # Check if a tag exists
221
+ # For compatibility with HTS.cr.
222
+ # @param key [String] tag name (2 characters)
223
+ # @return [Boolean] true if tag exists
224
+ def key?(key)
225
+ aux_ptr = LibHTS.bam_aux_get(@record.struct, key)
226
+ !aux_ptr.null?
227
+ end
228
+
229
+ alias include? key?
230
+
58
231
  def first
59
232
  aux_ptr = first_pointer
60
233
  return nil if aux_ptr.null?
@@ -62,7 +235,7 @@ module HTS
62
235
  get_ruby_aux(aux_ptr)
63
236
  end
64
237
 
65
- def each
238
+ def each_value
66
239
  return enum_for(__method__) unless block_given?
67
240
 
68
241
  aux_ptr = first_pointer
@@ -75,6 +248,27 @@ module HTS
75
248
  end
76
249
  end
77
250
 
251
+ # Iterate auxiliary tags with their SAM/BAM type.
252
+ #
253
+ # @yieldparam tag [String] 2-byte AUX tag name
254
+ # @yieldparam type [String] AUX type, e.g. "i", "Z", or "B:C"
255
+ # @yieldparam value [Object] Ruby representation of the AUX value
256
+ def each
257
+ return enum_for(__method__) unless block_given?
258
+
259
+ aux_ptr = first_pointer
260
+ return nil if aux_ptr.null?
261
+
262
+ loop do
263
+ tag = FFI::Pointer.new(aux_ptr.address - 2).read_string(2)
264
+ yield tag, aux_type(aux_ptr), get_ruby_aux(aux_ptr)
265
+ aux_ptr = LibHTS.bam_aux_next(@record.struct, aux_ptr)
266
+ break if aux_ptr.null?
267
+ end
268
+ end
269
+
270
+ alias each_pair each
271
+
78
272
  def to_h
79
273
  h = {}
80
274
  aux_ptr = first_pointer
@@ -95,8 +289,120 @@ module HTS
95
289
  LibHTS.bam_aux_first(@record.struct)
96
290
  end
97
291
 
292
+ def aux_type(aux_ptr)
293
+ type = aux_ptr.read_string(1)
294
+ return type unless type == "B"
295
+
296
+ "#{type}:#{aux_ptr.read_string(2)[1]}"
297
+ end
298
+
299
+ def validate_tag!(key)
300
+ return if key.is_a?(String) && key.bytesize == 2 && key.ascii_only?
301
+
302
+ raise ArgumentError, "AUX tag must be a 2-byte ASCII String"
303
+ end
304
+
305
+ def validate_string_value!(string)
306
+ return unless string.include?("\0")
307
+
308
+ raise ArgumentError, "String AUX tags must not contain NUL bytes"
309
+ end
310
+
311
+ def validate_char_value!(string)
312
+ return if string.bytesize == 1 && string.ascii_only? && /\A[!-~]\z/.match?(string)
313
+
314
+ raise ArgumentError, "Character AUX tags must be a single printable ASCII byte"
315
+ end
316
+
317
+ def validate_hex_value!(string)
318
+ raise ArgumentError, "Hex AUX tags must contain an even number of characters" if string.bytesize.odd?
319
+
320
+ return if string.ascii_only? && /\A[0-9A-Fa-f]*\z/.match?(string)
321
+
322
+ raise ArgumentError,
323
+ "Hex AUX tags must contain only ASCII hexadecimal characters"
324
+ end
325
+
326
+ def update_exact_integer(key, value, type, min, max)
327
+ validate_tag!(key)
328
+
329
+ integer = Integer(value)
330
+ raise RangeError, "Value #{integer} is out of range for AUX type #{type}" unless integer.between?(min, max)
331
+
332
+ replace_with_append(key, type, pack_scalar_payload(integer, type))
333
+ integer
334
+ end
335
+
336
+ def replace_with_append(key, type, payload)
337
+ delete(key) if key?(key)
338
+
339
+ ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
340
+ ptr.put_bytes(0, payload)
341
+ ret = LibHTS.bam_aux_append(@record.struct, key, type.ord, payload.bytesize, ptr)
342
+ raise "Failed to update #{type} tag '#{key}': errno #{FFI.errno}" if ret < 0
343
+
344
+ true
345
+ end
346
+
347
+ def pack_scalar_payload(value, type)
348
+ case type
349
+ when "c"
350
+ [value].pack("c")
351
+ when "C"
352
+ [value].pack("C")
353
+ when "s"
354
+ [value].pack("s<")
355
+ when "S"
356
+ [value].pack("S<")
357
+ when "i"
358
+ [value].pack("l<")
359
+ when "I"
360
+ [value].pack("L<")
361
+ else
362
+ raise ArgumentError, "Unsupported scalar AUX type: #{type}"
363
+ end
364
+ end
365
+
366
+ def pack_array_payload(value, type)
367
+ case type
368
+ when "c"
369
+ validate_integer_array_range!(value, -128, 127, type)
370
+ value.pack("c*")
371
+ when "C"
372
+ validate_integer_array_range!(value, 0, 255, type)
373
+ value.pack("C*")
374
+ when "s"
375
+ validate_integer_array_range!(value, -32_768, 32_767, type)
376
+ value.pack("s<*")
377
+ when "S"
378
+ validate_integer_array_range!(value, 0, 65_535, type)
379
+ value.pack("S<*")
380
+ when "i"
381
+ validate_integer_array_range!(value, -2_147_483_648, 2_147_483_647, type)
382
+ value.pack("l<*")
383
+ when "I"
384
+ validate_integer_array_range!(value, 0, 4_294_967_295, type)
385
+ value.pack("L<*")
386
+ when "f"
387
+ value.map(&:to_f).pack("e*")
388
+ else
389
+ raise ArgumentError, "Invalid array type: #{type}"
390
+ end
391
+ end
392
+
393
+ def validate_integer_array_range!(value, min, max, type)
394
+ value.each do |element|
395
+ integer = Integer(element)
396
+ unless integer.between?(min, max)
397
+ raise RangeError, "Array element #{integer} is out of range for AUX array type #{type}"
398
+ end
399
+ end
400
+ end
401
+
98
402
  def get_ruby_aux(aux_ptr, type = nil)
99
- type = type ? type.to_s : aux_ptr.read_string(1)
403
+ actual_type = aux_ptr.read_string(1)
404
+ type = type ? type.to_s : actual_type
405
+ validate_aux_type!(actual_type, type)
100
406
 
101
407
  # A (character), B (general array),
102
408
  # f (real number), H (hexadecimal array),
@@ -128,6 +434,27 @@ module HTS
128
434
  raise NotImplementedError, "type: #{type}"
129
435
  end
130
436
  end
437
+
438
+ def validate_aux_type!(actual_type, requested_type)
439
+ return if aux_type_compatible?(actual_type, requested_type)
440
+
441
+ raise TypeError, "AUX type mismatch: requested #{requested_type.inspect}, actual #{actual_type.inspect}"
442
+ end
443
+
444
+ def aux_type_compatible?(actual_type, requested_type)
445
+ case requested_type
446
+ when "i", "I", "c", "C", "s", "S"
447
+ %w[i I c C s S].include?(actual_type)
448
+ when "f", "d"
449
+ %w[f d].include?(actual_type)
450
+ when "Z", "H"
451
+ %w[Z H].include?(actual_type)
452
+ when "A", "B"
453
+ actual_type == requested_type
454
+ else
455
+ true
456
+ end
457
+ end
131
458
  end
132
459
  end
133
460
  end
data/lib/hts/bam/cigar.rb CHANGED
@@ -15,11 +15,19 @@ module HTS
15
15
  def self.parse(str)
16
16
  c = FFI::MemoryPointer.new(:pointer)
17
17
  m = FFI::MemoryPointer.new(:size_t)
18
- LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
19
- cigar_array = c.read_pointer.read_array_of_uint32(m.read(:size_t))
18
+ c.write_pointer(FFI::Pointer::NULL)
19
+ m.write(:size_t, 0)
20
+ ptr = nil
21
+ n_cigar = LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
22
+ raise "sam_parse_cigar failed: #{n_cigar}" if n_cigar.negative?
23
+
24
+ ptr = c.read_pointer
25
+ cigar_array = ptr.null? ? [] : ptr.read_array_of_uint32(n_cigar)
20
26
  obj = new
21
27
  obj.array = cigar_array
22
28
  obj
29
+ ensure
30
+ LibHTS.hts_free(ptr) if ptr && !ptr.null?
23
31
  end
24
32
 
25
33
  def initialize(record = nil)