htslib 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +67 -0
- data/lib/hts/bam/auxi.rb +329 -2
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +269 -28
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +110 -73
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +41 -3
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +29 -2
- data/lib/hts/version.rb +1 -1
- metadata +3 -3
- data/lib/hts/faidx/sequence.rb +0 -62
data/lib/hts/bcf/info.rb
CHANGED
|
@@ -6,7 +6,6 @@ module HTS
|
|
|
6
6
|
class Info
|
|
7
7
|
def initialize(record)
|
|
8
8
|
@record = record
|
|
9
|
-
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
|
10
9
|
end
|
|
11
10
|
|
|
12
11
|
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
|
@@ -16,37 +15,55 @@ module HTS
|
|
|
16
15
|
# I think they are better than `fetch_int`` and `fetch_float`.
|
|
17
16
|
def get(key, type = nil)
|
|
18
17
|
n = FFI::MemoryPointer.new(:int)
|
|
19
|
-
p1 =
|
|
18
|
+
p1 = FFI::MemoryPointer.new(:pointer)
|
|
19
|
+
p1.write_pointer(FFI::Pointer::NULL)
|
|
20
20
|
h = @record.header.struct
|
|
21
21
|
r = @record.struct
|
|
22
22
|
|
|
23
|
-
info_values = proc do |typ|
|
|
23
|
+
info_values = proc do |typ, reader|
|
|
24
24
|
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
|
|
25
25
|
return nil if ret < 0 # return from method.
|
|
26
26
|
|
|
27
|
-
p1.read_pointer
|
|
27
|
+
dst = p1.read_pointer
|
|
28
|
+
begin
|
|
29
|
+
reader.call(dst, n.read_int)
|
|
30
|
+
ensure
|
|
31
|
+
LibHTS.hts_free(dst) unless dst.null?
|
|
32
|
+
p1.write_pointer(FFI::Pointer::NULL)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
actual_type = ht_type_to_sym(get_info_type(key))
|
|
37
|
+
if type && actual_type && !info_type_compatible?(actual_type, type.to_sym)
|
|
38
|
+
raise InfoTypeError, "Tag #{key} is not #{type_label(type)} INFO field"
|
|
28
39
|
end
|
|
29
40
|
|
|
30
|
-
type ||=
|
|
41
|
+
type ||= actual_type
|
|
42
|
+
return nil if actual_type && !key?(key)
|
|
31
43
|
|
|
32
44
|
case type&.to_sym
|
|
33
45
|
when :int, :int32
|
|
34
|
-
info_values.call(LibHTS::BCF_HT_INT)
|
|
35
|
-
|
|
46
|
+
info_values.call(LibHTS::BCF_HT_INT, ->(dst, len) { dst.read_array_of_int32(len) })
|
|
47
|
+
when :int64, :long
|
|
48
|
+
info_values.call(LibHTS::BCF_HT_LONG, ->(dst, len) { dst.read_array_of_int64(len) })
|
|
36
49
|
when :float, :real
|
|
37
|
-
info_values.call(LibHTS::BCF_HT_REAL)
|
|
38
|
-
.read_array_of_float(n.read_int)
|
|
50
|
+
info_values.call(LibHTS::BCF_HT_REAL, ->(dst, len) { dst.read_array_of_float(len) })
|
|
39
51
|
when :flag, :bool
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
begin
|
|
53
|
+
case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
|
|
54
|
+
when 1 then true
|
|
55
|
+
when 0 then false
|
|
56
|
+
when -1 then nil
|
|
57
|
+
else
|
|
58
|
+
raise InfoReadError, "Unknown return value from bcf_get_info_flag: #{ret}"
|
|
59
|
+
end
|
|
60
|
+
ensure
|
|
61
|
+
dst = p1.read_pointer
|
|
62
|
+
LibHTS.hts_free(dst) unless dst.null?
|
|
63
|
+
p1.write_pointer(FFI::Pointer::NULL)
|
|
46
64
|
end
|
|
47
65
|
when :string, :str
|
|
48
|
-
info_values.call(LibHTS::BCF_HT_STR)
|
|
49
|
-
.read_string
|
|
66
|
+
info_values.call(LibHTS::BCF_HT_STR, ->(dst, _len) { dst.read_string })
|
|
50
67
|
end
|
|
51
68
|
end
|
|
52
69
|
|
|
@@ -60,6 +77,11 @@ module HTS
|
|
|
60
77
|
get(key, :float)
|
|
61
78
|
end
|
|
62
79
|
|
|
80
|
+
# For compatibility with HTS.cr.
|
|
81
|
+
def get_int64(key)
|
|
82
|
+
get(key, :int64)
|
|
83
|
+
end
|
|
84
|
+
|
|
63
85
|
# For compatibility with HTS.cr.
|
|
64
86
|
def get_string(key)
|
|
65
87
|
get(key, :string)
|
|
@@ -74,6 +96,194 @@ module HTS
|
|
|
74
96
|
get(key)
|
|
75
97
|
end
|
|
76
98
|
|
|
99
|
+
# Set INFO field value with automatic type detection.
|
|
100
|
+
# @param key [String] INFO tag name
|
|
101
|
+
# @param value [Integer, Float, String, Array, true, false, nil] value to set
|
|
102
|
+
# - Integer or Array<Integer> -> update_int
|
|
103
|
+
# - Float or Array<Float,Integer> -> update_float
|
|
104
|
+
# - String -> update_string
|
|
105
|
+
# - true/false -> update_flag
|
|
106
|
+
# - nil -> delete the INFO field
|
|
107
|
+
def []=(key, value)
|
|
108
|
+
case value
|
|
109
|
+
when nil
|
|
110
|
+
delete(key)
|
|
111
|
+
when true, false
|
|
112
|
+
update_flag(key, value)
|
|
113
|
+
when Integer
|
|
114
|
+
unless int32_range?(value)
|
|
115
|
+
raise RangeError,
|
|
116
|
+
"Integer out of int32 range for []=. Current htslib backend does not support int64 INFO update."
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
update_int(key, [value])
|
|
120
|
+
when Float
|
|
121
|
+
update_float(key, [value])
|
|
122
|
+
when String
|
|
123
|
+
update_string(key, value)
|
|
124
|
+
when Array
|
|
125
|
+
if value.empty?
|
|
126
|
+
raise ArgumentError, "Cannot set INFO field to empty array. Use nil to delete."
|
|
127
|
+
elsif value.all? { |v| v.is_a?(Integer) }
|
|
128
|
+
unless value.all? { |v| int32_range?(v) }
|
|
129
|
+
raise RangeError,
|
|
130
|
+
"Integer array contains out-of-int32 values for []=. Current htslib backend does not support int64 INFO update."
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
update_int(key, value)
|
|
134
|
+
elsif value.all? { |v| v.is_a?(Numeric) }
|
|
135
|
+
update_float(key, value)
|
|
136
|
+
else
|
|
137
|
+
raise ArgumentError, "INFO array must contain only integers or floats, got: #{value.map(&:class).uniq}"
|
|
138
|
+
end
|
|
139
|
+
else
|
|
140
|
+
raise ArgumentError, "Unsupported INFO value type: #{value.class}"
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Update INFO field with integer value(s).
|
|
145
|
+
# For compatibility with HTS.cr.
|
|
146
|
+
# @param key [String] INFO tag name
|
|
147
|
+
# @param values [Array<Integer>] integer values (use single-element array for scalar)
|
|
148
|
+
def update_int(key, values)
|
|
149
|
+
values = Array(values)
|
|
150
|
+
ptr = FFI::MemoryPointer.new(:int32, values.size)
|
|
151
|
+
ptr.write_array_of_int32(values)
|
|
152
|
+
ret = LibHTS.bcf_update_info(
|
|
153
|
+
@record.header.struct,
|
|
154
|
+
@record.struct,
|
|
155
|
+
key,
|
|
156
|
+
ptr,
|
|
157
|
+
values.size,
|
|
158
|
+
LibHTS::BCF_HT_INT
|
|
159
|
+
)
|
|
160
|
+
raise InfoUpdateError, "Failed to update INFO int field '#{key}': #{ret}" if ret < 0
|
|
161
|
+
|
|
162
|
+
ret
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Update INFO field with int64 value(s).
|
|
166
|
+
# @note int64 INFO values are primarily relevant for VCF output.
|
|
167
|
+
# @param key [String] INFO tag name
|
|
168
|
+
# @param values [Array<Integer>] integer values (use single-element array for scalar)
|
|
169
|
+
def update_int64(_key, _values)
|
|
170
|
+
raise UnsupportedInfoOperationError, "htslib backend does not implement int64 INFO update (BCF_HT_LONG)"
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Update INFO field with float value(s).
|
|
174
|
+
# For compatibility with HTS.cr.
|
|
175
|
+
# @param key [String] INFO tag name
|
|
176
|
+
# @param values [Array<Float>] float values (use single-element array for scalar)
|
|
177
|
+
def update_float(key, values)
|
|
178
|
+
values = Array(values).map(&:to_f)
|
|
179
|
+
ptr = FFI::MemoryPointer.new(:float, values.size)
|
|
180
|
+
ptr.write_array_of_float(values)
|
|
181
|
+
ret = LibHTS.bcf_update_info(
|
|
182
|
+
@record.header.struct,
|
|
183
|
+
@record.struct,
|
|
184
|
+
key,
|
|
185
|
+
ptr,
|
|
186
|
+
values.size,
|
|
187
|
+
LibHTS::BCF_HT_REAL
|
|
188
|
+
)
|
|
189
|
+
raise InfoUpdateError, "Failed to update INFO float field '#{key}': #{ret}" if ret < 0
|
|
190
|
+
|
|
191
|
+
ret
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Update INFO field with string value.
|
|
195
|
+
# For compatibility with HTS.cr.
|
|
196
|
+
# @param key [String] INFO tag name
|
|
197
|
+
# @param value [String] string value
|
|
198
|
+
def update_string(key, value)
|
|
199
|
+
ret = LibHTS.bcf_update_info(
|
|
200
|
+
@record.header.struct,
|
|
201
|
+
@record.struct,
|
|
202
|
+
key,
|
|
203
|
+
value.to_s,
|
|
204
|
+
1,
|
|
205
|
+
LibHTS::BCF_HT_STR
|
|
206
|
+
)
|
|
207
|
+
raise InfoUpdateError, "Failed to update INFO string field '#{key}': #{ret}" if ret < 0
|
|
208
|
+
|
|
209
|
+
ret
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Update INFO flag field.
|
|
213
|
+
# For compatibility with HTS.cr.
|
|
214
|
+
# @param key [String] INFO tag name
|
|
215
|
+
# @param present [Boolean] true to set flag, false to remove it
|
|
216
|
+
def update_flag(key, present = true)
|
|
217
|
+
ret = if present
|
|
218
|
+
LibHTS.bcf_update_info(
|
|
219
|
+
@record.header.struct,
|
|
220
|
+
@record.struct,
|
|
221
|
+
key,
|
|
222
|
+
FFI::Pointer::NULL,
|
|
223
|
+
1,
|
|
224
|
+
LibHTS::BCF_HT_FLAG
|
|
225
|
+
)
|
|
226
|
+
else
|
|
227
|
+
# Remove flag by setting n=0
|
|
228
|
+
LibHTS.bcf_update_info(
|
|
229
|
+
@record.header.struct,
|
|
230
|
+
@record.struct,
|
|
231
|
+
key,
|
|
232
|
+
FFI::Pointer::NULL,
|
|
233
|
+
0,
|
|
234
|
+
LibHTS::BCF_HT_FLAG
|
|
235
|
+
)
|
|
236
|
+
end
|
|
237
|
+
raise InfoUpdateError, "Failed to update INFO flag field '#{key}': #{ret}" if ret < 0
|
|
238
|
+
|
|
239
|
+
ret
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Delete an INFO field.
|
|
243
|
+
# @param key [String] INFO tag name
|
|
244
|
+
# @return [Boolean] true if field was deleted, false if it didn't exist
|
|
245
|
+
def delete(key)
|
|
246
|
+
type = get_info_type(key)
|
|
247
|
+
return false if type.nil?
|
|
248
|
+
return false unless key?(key)
|
|
249
|
+
|
|
250
|
+
# Delete by setting n=0
|
|
251
|
+
ret = LibHTS.bcf_update_info(
|
|
252
|
+
@record.header.struct,
|
|
253
|
+
@record.struct,
|
|
254
|
+
key,
|
|
255
|
+
FFI::Pointer::NULL,
|
|
256
|
+
0,
|
|
257
|
+
type
|
|
258
|
+
)
|
|
259
|
+
return false if ret < 0
|
|
260
|
+
|
|
261
|
+
true
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Check if an INFO field exists.
|
|
265
|
+
# @param key [String] INFO tag name
|
|
266
|
+
# @return [Boolean] true if the field exists
|
|
267
|
+
def key?(key)
|
|
268
|
+
type = header_info_type(key)
|
|
269
|
+
return false if type.nil?
|
|
270
|
+
|
|
271
|
+
ndst = FFI::MemoryPointer.new(:int)
|
|
272
|
+
ndst.write_int(0)
|
|
273
|
+
dst_ptr = FFI::MemoryPointer.new(:pointer)
|
|
274
|
+
dst_ptr.write_pointer(FFI::Pointer::NULL)
|
|
275
|
+
|
|
276
|
+
ret = LibHTS.bcf_get_info_values(@record.header.struct, @record.struct, key, dst_ptr, ndst, type)
|
|
277
|
+
type == LibHTS::BCF_HT_FLAG ? ret == 1 : ret >= 0
|
|
278
|
+
ensure
|
|
279
|
+
if dst_ptr
|
|
280
|
+
dst = dst_ptr.read_pointer
|
|
281
|
+
LibHTS.hts_free(dst) unless dst.null?
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
alias include? key?
|
|
286
|
+
|
|
77
287
|
# FIXME: naming? room for improvement.
|
|
78
288
|
def fields
|
|
79
289
|
keys.map do |key|
|
|
@@ -119,16 +329,15 @@ module HTS
|
|
|
119
329
|
end
|
|
120
330
|
|
|
121
331
|
def get_info_type(key)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
nil
|
|
332
|
+
header_info_type(key)
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def header_info_type(key)
|
|
336
|
+
id = LibHTS.bcf_hdr_id2int(@record.header.struct, LibHTS::BCF_DT_ID, key)
|
|
337
|
+
return nil if id.negative?
|
|
338
|
+
return nil unless LibHTS.bcf_hdr_idinfo_exists(@record.header.struct, LibHTS::BCF_HL_INFO, id)
|
|
339
|
+
|
|
340
|
+
LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, id)
|
|
132
341
|
end
|
|
133
342
|
|
|
134
343
|
def ht_type_to_sym(t)
|
|
@@ -137,7 +346,39 @@ module HTS
|
|
|
137
346
|
when LibHTS::BCF_HT_INT then :int
|
|
138
347
|
when LibHTS::BCF_HT_REAL then :float
|
|
139
348
|
when LibHTS::BCF_HT_STR then :string
|
|
140
|
-
when LibHTS::BCF_HT_LONG then :
|
|
349
|
+
when LibHTS::BCF_HT_LONG then :int64
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def int32_range?(value)
|
|
354
|
+
value >= -2_147_483_648 && value <= 2_147_483_647
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def info_type_compatible?(actual_type, requested_type)
|
|
358
|
+
case requested_type
|
|
359
|
+
when :int, :int32
|
|
360
|
+
actual_type == :int
|
|
361
|
+
when :int64, :long
|
|
362
|
+
%i[int int64].include?(actual_type)
|
|
363
|
+
when :float, :real
|
|
364
|
+
actual_type == :float
|
|
365
|
+
when :flag, :bool
|
|
366
|
+
actual_type == :flag
|
|
367
|
+
when :string, :str
|
|
368
|
+
actual_type == :string
|
|
369
|
+
else
|
|
370
|
+
actual_type == requested_type
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def type_label(type)
|
|
375
|
+
case type.to_sym
|
|
376
|
+
when :int, :int32 then "integer"
|
|
377
|
+
when :int64, :long then "integer"
|
|
378
|
+
when :float, :real then "float"
|
|
379
|
+
when :flag, :bool then "flag"
|
|
380
|
+
when :string, :str then "string"
|
|
381
|
+
else type.to_s
|
|
141
382
|
end
|
|
142
383
|
end
|
|
143
384
|
end
|
data/lib/hts/bcf/record.rb
CHANGED
|
@@ -54,11 +54,11 @@ module HTS
|
|
|
54
54
|
end
|
|
55
55
|
|
|
56
56
|
def id=(id)
|
|
57
|
-
LibHTS.bcf_update_id(@header, @bcf1, id)
|
|
57
|
+
LibHTS.bcf_update_id(@header.struct, @bcf1, id)
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
def clear_id
|
|
61
|
-
LibHTS.bcf_update_id(@header, @bcf1, ".")
|
|
61
|
+
LibHTS.bcf_update_id(@header.struct, @bcf1, ".")
|
|
62
62
|
end
|
|
63
63
|
|
|
64
64
|
def ref
|
|
@@ -100,7 +100,7 @@ module HTS
|
|
|
100
100
|
when 1
|
|
101
101
|
id = d[:flt].read_int
|
|
102
102
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, id)
|
|
103
|
-
when 2..
|
|
103
|
+
when 2..
|
|
104
104
|
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
|
105
105
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
|
106
106
|
end
|
|
@@ -130,9 +130,13 @@ module HTS
|
|
|
130
130
|
|
|
131
131
|
def to_s
|
|
132
132
|
ksr = LibHTS::KString.new
|
|
133
|
-
|
|
133
|
+
begin
|
|
134
|
+
raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
|
|
134
135
|
|
|
135
|
-
|
|
136
|
+
ksr.read_string_copy
|
|
137
|
+
ensure
|
|
138
|
+
ksr.free_buffer
|
|
139
|
+
end
|
|
136
140
|
end
|
|
137
141
|
|
|
138
142
|
private
|