htslib 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +23 -1
- data/lib/hts/bam/auxi.rb +228 -19
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +119 -36
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +85 -102
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +34 -2
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +10 -5
- data/lib/hts/version.rb +1 -1
- metadata +4 -4
- data/lib/hts/faidx/sequence.rb +0 -62
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 26b019acbf5b8e110829d2666710f9187923edc57a4af486be8fc4ac690cd0e0
|
|
4
|
+
data.tar.gz: 8edf3b8fc44ce5ce42f0d09f2dcfffdb3dd4b145e72aac28bf13b4daa6d952ad
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6429da59067ed17e863117a98320304eb9445b82eb0a8ed94c6afe0b5c01cd149797e068cfa6c14cb32f7e60c0cfeedfc9c49d47cf7a5d92191a3818ad8f63a6
|
|
7
|
+
data.tar.gz: 5f6ce5d70616eb23213ebde0a77b1a0efb66664263c839eb3d00bc08d957c77655d9b03c426573e3db7b5a9d459f8ec86077e771b55b51523fbda520ac82cfcb
|
data/TUTORIAL.md
CHANGED
|
@@ -254,6 +254,23 @@ in.close
|
|
|
254
254
|
out.close
|
|
255
255
|
```
|
|
256
256
|
|
|
257
|
+
Update INFO fields
|
|
258
|
+
|
|
259
|
+
```ruby
|
|
260
|
+
bcf = HTS::Bcf.open("in.vcf")
|
|
261
|
+
record = bcf.first
|
|
262
|
+
info = record.info
|
|
263
|
+
|
|
264
|
+
info.update_int("DP", [30])
|
|
265
|
+
# info.update_int64("DP", [2**40]) # Backend-dependent (BCF_HT_LONG)
|
|
266
|
+
info.update_float("AF", [0.25])
|
|
267
|
+
info.update_string("STR", "sample")
|
|
268
|
+
info.update_flag("SOMATIC", true)
|
|
269
|
+
|
|
270
|
+
info["DP"] = 100
|
|
271
|
+
# Out-of-int32 integers require explicit update_int64
|
|
272
|
+
```
|
|
273
|
+
|
|
257
274
|
Writing and modifying auxiliary tags
|
|
258
275
|
|
|
259
276
|
```ruby
|
|
@@ -277,9 +294,14 @@ in_bam.each do |record|
|
|
|
277
294
|
|
|
278
295
|
# Update or add tags using type-specific methods
|
|
279
296
|
aux.update_int("AS", 100) # Integer tag
|
|
297
|
+
aux.update_uint8("XI", 255) # Exact unsigned 8-bit integer tag
|
|
280
298
|
aux.update_float("ZQ", 0.95) # Float tag
|
|
299
|
+
aux.update_double("ZD", 0.125) # Double tag
|
|
300
|
+
aux.update_char("XC", "Y") # Character tag
|
|
301
|
+
aux.update_hex("XH", "DEADBEEF") # Hex string tag
|
|
281
302
|
aux.update_string("RG", "sample1") # String tag
|
|
282
|
-
aux.update_array("BC", [25, 30, 28, 32]) # Array tag
|
|
303
|
+
aux.update_array("BC", [25, 30, 28, 32]) # Array tag (default subtype: i)
|
|
304
|
+
aux.update_array("BQ", [25, 30, 28, 32], type: "C") # Array tag with explicit subtype
|
|
283
305
|
|
|
284
306
|
# Or use the []= operator (auto-detects type)
|
|
285
307
|
aux["NM"] = 2 # Integer
|
data/lib/hts/bam/auxi.rb
CHANGED
|
@@ -79,17 +79,49 @@ module HTS
|
|
|
79
79
|
# @param key [String] tag name (2 characters)
|
|
80
80
|
# @param value [Integer] integer value
|
|
81
81
|
def update_int(key, value)
|
|
82
|
+
validate_tag!(key)
|
|
82
83
|
ret = LibHTS.bam_aux_update_int(@record.struct, key, value.to_i)
|
|
83
84
|
raise "Failed to update integer tag '#{key}': errno #{FFI.errno}" if ret < 0
|
|
84
85
|
|
|
85
86
|
value
|
|
86
87
|
end
|
|
87
88
|
|
|
89
|
+
# Update or add a signed 8-bit integer tag.
|
|
90
|
+
def update_int8(key, value)
|
|
91
|
+
update_exact_integer(key, value, "c", -128, 127)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Update or add an unsigned 8-bit integer tag.
|
|
95
|
+
def update_uint8(key, value)
|
|
96
|
+
update_exact_integer(key, value, "C", 0, 255)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Update or add a signed 16-bit integer tag.
|
|
100
|
+
def update_int16(key, value)
|
|
101
|
+
update_exact_integer(key, value, "s", -32_768, 32_767)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Update or add an unsigned 16-bit integer tag.
|
|
105
|
+
def update_uint16(key, value)
|
|
106
|
+
update_exact_integer(key, value, "S", 0, 65_535)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Update or add a signed 32-bit integer tag.
|
|
110
|
+
def update_int32(key, value)
|
|
111
|
+
update_exact_integer(key, value, "i", -2_147_483_648, 2_147_483_647)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Update or add an unsigned 32-bit integer tag.
|
|
115
|
+
def update_uint32(key, value)
|
|
116
|
+
update_exact_integer(key, value, "I", 0, 4_294_967_295)
|
|
117
|
+
end
|
|
118
|
+
|
|
88
119
|
# Update or add a floating-point tag
|
|
89
120
|
# For compatibility with HTS.cr.
|
|
90
121
|
# @param key [String] tag name (2 characters)
|
|
91
122
|
# @param value [Float] floating-point value
|
|
92
123
|
def update_float(key, value)
|
|
124
|
+
validate_tag!(key)
|
|
93
125
|
ret = LibHTS.bam_aux_update_float(@record.struct, key, value.to_f)
|
|
94
126
|
raise "Failed to update float tag '#{key}': errno #{FFI.errno}" if ret < 0
|
|
95
127
|
|
|
@@ -101,10 +133,43 @@ module HTS
|
|
|
101
133
|
# @param key [String] tag name (2 characters)
|
|
102
134
|
# @param value [String] string value
|
|
103
135
|
def update_string(key, value)
|
|
104
|
-
|
|
136
|
+
validate_tag!(key)
|
|
137
|
+
string = value.to_s
|
|
138
|
+
validate_string_value!(string)
|
|
139
|
+
ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, string)
|
|
105
140
|
raise "Failed to update string tag '#{key}': errno #{FFI.errno}" if ret < 0
|
|
106
141
|
|
|
107
|
-
|
|
142
|
+
string
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Update or add a character tag.
|
|
146
|
+
def update_char(key, value)
|
|
147
|
+
validate_tag!(key)
|
|
148
|
+
|
|
149
|
+
string = value.to_s
|
|
150
|
+
validate_char_value!(string)
|
|
151
|
+
|
|
152
|
+
replace_with_append(key, "A", string.b)
|
|
153
|
+
string
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Update or add a hexadecimal string tag.
|
|
157
|
+
def update_hex(key, value)
|
|
158
|
+
validate_tag!(key)
|
|
159
|
+
|
|
160
|
+
string = value.to_s
|
|
161
|
+
validate_hex_value!(string)
|
|
162
|
+
|
|
163
|
+
replace_with_append(key, "H", string.b + "\0")
|
|
164
|
+
string
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Update or add a double-precision floating-point tag.
|
|
168
|
+
def update_double(key, value)
|
|
169
|
+
validate_tag!(key)
|
|
170
|
+
|
|
171
|
+
replace_with_append(key, "d", [Float(value)].pack("E"))
|
|
172
|
+
value.to_f
|
|
108
173
|
end
|
|
109
174
|
|
|
110
175
|
# Update or add an array tag
|
|
@@ -113,6 +178,7 @@ module HTS
|
|
|
113
178
|
# @param value [Array] array of integers or floats
|
|
114
179
|
# @param type [String, nil] element type ('c', 'C', 's', 'S', 'i', 'I', 'f'). Auto-detected if nil.
|
|
115
180
|
def update_array(key, value, type: nil)
|
|
181
|
+
validate_tag!(key)
|
|
116
182
|
raise ArgumentError, "Array cannot be empty" if value.empty?
|
|
117
183
|
|
|
118
184
|
# Auto-detect type if not specified
|
|
@@ -127,21 +193,10 @@ module HTS
|
|
|
127
193
|
end
|
|
128
194
|
end
|
|
129
195
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
ptr = FFI::MemoryPointer.new(:int32, value.size)
|
|
135
|
-
ptr.write_array_of_int32(value.map(&:to_i))
|
|
136
|
-
ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
|
|
137
|
-
when "f"
|
|
138
|
-
# Float type
|
|
139
|
-
ptr = FFI::MemoryPointer.new(:float, value.size)
|
|
140
|
-
ptr.write_array_of_float(value.map(&:to_f))
|
|
141
|
-
ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
|
|
142
|
-
else
|
|
143
|
-
raise ArgumentError, "Invalid array type: #{type}"
|
|
144
|
-
end
|
|
196
|
+
payload = pack_array_payload(value, type)
|
|
197
|
+
ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
|
|
198
|
+
ptr.put_bytes(0, payload)
|
|
199
|
+
ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
|
|
145
200
|
|
|
146
201
|
raise "Failed to update array tag '#{key}': errno #{FFI.errno}" if ret < 0
|
|
147
202
|
|
|
@@ -180,7 +235,7 @@ module HTS
|
|
|
180
235
|
get_ruby_aux(aux_ptr)
|
|
181
236
|
end
|
|
182
237
|
|
|
183
|
-
def
|
|
238
|
+
def each_value
|
|
184
239
|
return enum_for(__method__) unless block_given?
|
|
185
240
|
|
|
186
241
|
aux_ptr = first_pointer
|
|
@@ -193,6 +248,27 @@ module HTS
|
|
|
193
248
|
end
|
|
194
249
|
end
|
|
195
250
|
|
|
251
|
+
# Iterate auxiliary tags with their SAM/BAM type.
|
|
252
|
+
#
|
|
253
|
+
# @yieldparam tag [String] 2-byte AUX tag name
|
|
254
|
+
# @yieldparam type [String] AUX type, e.g. "i", "Z", or "B:C"
|
|
255
|
+
# @yieldparam value [Object] Ruby representation of the AUX value
|
|
256
|
+
def each
|
|
257
|
+
return enum_for(__method__) unless block_given?
|
|
258
|
+
|
|
259
|
+
aux_ptr = first_pointer
|
|
260
|
+
return nil if aux_ptr.null?
|
|
261
|
+
|
|
262
|
+
loop do
|
|
263
|
+
tag = FFI::Pointer.new(aux_ptr.address - 2).read_string(2)
|
|
264
|
+
yield tag, aux_type(aux_ptr), get_ruby_aux(aux_ptr)
|
|
265
|
+
aux_ptr = LibHTS.bam_aux_next(@record.struct, aux_ptr)
|
|
266
|
+
break if aux_ptr.null?
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
alias each_pair each
|
|
271
|
+
|
|
196
272
|
def to_h
|
|
197
273
|
h = {}
|
|
198
274
|
aux_ptr = first_pointer
|
|
@@ -213,8 +289,120 @@ module HTS
|
|
|
213
289
|
LibHTS.bam_aux_first(@record.struct)
|
|
214
290
|
end
|
|
215
291
|
|
|
292
|
+
def aux_type(aux_ptr)
|
|
293
|
+
type = aux_ptr.read_string(1)
|
|
294
|
+
return type unless type == "B"
|
|
295
|
+
|
|
296
|
+
"#{type}:#{aux_ptr.read_string(2)[1]}"
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def validate_tag!(key)
|
|
300
|
+
return if key.is_a?(String) && key.bytesize == 2 && key.ascii_only?
|
|
301
|
+
|
|
302
|
+
raise ArgumentError, "AUX tag must be a 2-byte ASCII String"
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def validate_string_value!(string)
|
|
306
|
+
return unless string.include?("\0")
|
|
307
|
+
|
|
308
|
+
raise ArgumentError, "String AUX tags must not contain NUL bytes"
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def validate_char_value!(string)
|
|
312
|
+
return if string.bytesize == 1 && string.ascii_only? && /\A[!-~]\z/.match?(string)
|
|
313
|
+
|
|
314
|
+
raise ArgumentError, "Character AUX tags must be a single printable ASCII byte"
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def validate_hex_value!(string)
|
|
318
|
+
raise ArgumentError, "Hex AUX tags must contain an even number of characters" if string.bytesize.odd?
|
|
319
|
+
|
|
320
|
+
return if string.ascii_only? && /\A[0-9A-Fa-f]*\z/.match?(string)
|
|
321
|
+
|
|
322
|
+
raise ArgumentError,
|
|
323
|
+
"Hex AUX tags must contain only ASCII hexadecimal characters"
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def update_exact_integer(key, value, type, min, max)
|
|
327
|
+
validate_tag!(key)
|
|
328
|
+
|
|
329
|
+
integer = Integer(value)
|
|
330
|
+
raise RangeError, "Value #{integer} is out of range for AUX type #{type}" unless integer.between?(min, max)
|
|
331
|
+
|
|
332
|
+
replace_with_append(key, type, pack_scalar_payload(integer, type))
|
|
333
|
+
integer
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def replace_with_append(key, type, payload)
|
|
337
|
+
delete(key) if key?(key)
|
|
338
|
+
|
|
339
|
+
ptr = FFI::MemoryPointer.new(:uint8, payload.bytesize)
|
|
340
|
+
ptr.put_bytes(0, payload)
|
|
341
|
+
ret = LibHTS.bam_aux_append(@record.struct, key, type.ord, payload.bytesize, ptr)
|
|
342
|
+
raise "Failed to update #{type} tag '#{key}': errno #{FFI.errno}" if ret < 0
|
|
343
|
+
|
|
344
|
+
true
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def pack_scalar_payload(value, type)
|
|
348
|
+
case type
|
|
349
|
+
when "c"
|
|
350
|
+
[value].pack("c")
|
|
351
|
+
when "C"
|
|
352
|
+
[value].pack("C")
|
|
353
|
+
when "s"
|
|
354
|
+
[value].pack("s<")
|
|
355
|
+
when "S"
|
|
356
|
+
[value].pack("S<")
|
|
357
|
+
when "i"
|
|
358
|
+
[value].pack("l<")
|
|
359
|
+
when "I"
|
|
360
|
+
[value].pack("L<")
|
|
361
|
+
else
|
|
362
|
+
raise ArgumentError, "Unsupported scalar AUX type: #{type}"
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
def pack_array_payload(value, type)
|
|
367
|
+
case type
|
|
368
|
+
when "c"
|
|
369
|
+
validate_integer_array_range!(value, -128, 127, type)
|
|
370
|
+
value.pack("c*")
|
|
371
|
+
when "C"
|
|
372
|
+
validate_integer_array_range!(value, 0, 255, type)
|
|
373
|
+
value.pack("C*")
|
|
374
|
+
when "s"
|
|
375
|
+
validate_integer_array_range!(value, -32_768, 32_767, type)
|
|
376
|
+
value.pack("s<*")
|
|
377
|
+
when "S"
|
|
378
|
+
validate_integer_array_range!(value, 0, 65_535, type)
|
|
379
|
+
value.pack("S<*")
|
|
380
|
+
when "i"
|
|
381
|
+
validate_integer_array_range!(value, -2_147_483_648, 2_147_483_647, type)
|
|
382
|
+
value.pack("l<*")
|
|
383
|
+
when "I"
|
|
384
|
+
validate_integer_array_range!(value, 0, 4_294_967_295, type)
|
|
385
|
+
value.pack("L<*")
|
|
386
|
+
when "f"
|
|
387
|
+
value.map(&:to_f).pack("e*")
|
|
388
|
+
else
|
|
389
|
+
raise ArgumentError, "Invalid array type: #{type}"
|
|
390
|
+
end
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def validate_integer_array_range!(value, min, max, type)
|
|
394
|
+
value.each do |element|
|
|
395
|
+
integer = Integer(element)
|
|
396
|
+
unless integer.between?(min, max)
|
|
397
|
+
raise RangeError, "Array element #{integer} is out of range for AUX array type #{type}"
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
|
|
216
402
|
def get_ruby_aux(aux_ptr, type = nil)
|
|
217
|
-
|
|
403
|
+
actual_type = aux_ptr.read_string(1)
|
|
404
|
+
type = type ? type.to_s : actual_type
|
|
405
|
+
validate_aux_type!(actual_type, type)
|
|
218
406
|
|
|
219
407
|
# A (character), B (general array),
|
|
220
408
|
# f (real number), H (hexadecimal array),
|
|
@@ -246,6 +434,27 @@ module HTS
|
|
|
246
434
|
raise NotImplementedError, "type: #{type}"
|
|
247
435
|
end
|
|
248
436
|
end
|
|
437
|
+
|
|
438
|
+
def validate_aux_type!(actual_type, requested_type)
|
|
439
|
+
return if aux_type_compatible?(actual_type, requested_type)
|
|
440
|
+
|
|
441
|
+
raise TypeError, "AUX type mismatch: requested #{requested_type.inspect}, actual #{actual_type.inspect}"
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def aux_type_compatible?(actual_type, requested_type)
|
|
445
|
+
case requested_type
|
|
446
|
+
when "i", "I", "c", "C", "s", "S"
|
|
447
|
+
%w[i I c C s S].include?(actual_type)
|
|
448
|
+
when "f", "d"
|
|
449
|
+
%w[f d].include?(actual_type)
|
|
450
|
+
when "Z", "H"
|
|
451
|
+
%w[Z H].include?(actual_type)
|
|
452
|
+
when "A", "B"
|
|
453
|
+
actual_type == requested_type
|
|
454
|
+
else
|
|
455
|
+
true
|
|
456
|
+
end
|
|
457
|
+
end
|
|
249
458
|
end
|
|
250
459
|
end
|
|
251
460
|
end
|
data/lib/hts/bam/cigar.rb
CHANGED
|
@@ -15,11 +15,19 @@ module HTS
|
|
|
15
15
|
def self.parse(str)
|
|
16
16
|
c = FFI::MemoryPointer.new(:pointer)
|
|
17
17
|
m = FFI::MemoryPointer.new(:size_t)
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
c.write_pointer(FFI::Pointer::NULL)
|
|
19
|
+
m.write(:size_t, 0)
|
|
20
|
+
ptr = nil
|
|
21
|
+
n_cigar = LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
|
|
22
|
+
raise "sam_parse_cigar failed: #{n_cigar}" if n_cigar.negative?
|
|
23
|
+
|
|
24
|
+
ptr = c.read_pointer
|
|
25
|
+
cigar_array = ptr.null? ? [] : ptr.read_array_of_uint32(n_cigar)
|
|
20
26
|
obj = new
|
|
21
27
|
obj.array = cigar_array
|
|
22
28
|
obj
|
|
29
|
+
ensure
|
|
30
|
+
LibHTS.hts_free(ptr) if ptr && !ptr.null?
|
|
23
31
|
end
|
|
24
32
|
|
|
25
33
|
def initialize(record = nil)
|