htslib 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +23 -1
- data/lib/hts/bam/auxi.rb +228 -19
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +119 -36
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +85 -102
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +34 -2
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +10 -5
- data/lib/hts/version.rb +1 -1
- metadata +4 -4
- data/lib/hts/faidx/sequence.rb +0 -62
data/lib/hts/bcf/header.rb
CHANGED
|
@@ -8,6 +8,19 @@ module HTS
|
|
|
8
8
|
# NOTE: This class has a lot of methods that are not stable.
|
|
9
9
|
# The method names and the number of arguments may change in the future.
|
|
10
10
|
class Header
|
|
11
|
+
BCF_TYPE_MAP = {
|
|
12
|
+
int: "Integer",
|
|
13
|
+
integer: "Integer",
|
|
14
|
+
int32: "Integer",
|
|
15
|
+
float: "Float",
|
|
16
|
+
real: "Float",
|
|
17
|
+
string: "String",
|
|
18
|
+
str: "String",
|
|
19
|
+
character: "Character",
|
|
20
|
+
char: "Character",
|
|
21
|
+
flag: "Flag"
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
11
24
|
def initialize(arg = nil)
|
|
12
25
|
case arg
|
|
13
26
|
when LibHTS::HtsFile
|
|
@@ -20,6 +33,12 @@ module HTS
|
|
|
20
33
|
raise TypeError, "Invalid argument"
|
|
21
34
|
end
|
|
22
35
|
|
|
36
|
+
@sync_depth = 0
|
|
37
|
+
@sync_needed = false
|
|
38
|
+
@subset_samples = nil
|
|
39
|
+
@subset_imap = nil
|
|
40
|
+
@subset_imap_pointer = nil
|
|
41
|
+
|
|
23
42
|
yield self if block_given?
|
|
24
43
|
end
|
|
25
44
|
|
|
@@ -36,13 +55,34 @@ module HTS
|
|
|
36
55
|
end
|
|
37
56
|
|
|
38
57
|
def set_version(version)
|
|
39
|
-
LibHTS.bcf_hdr_set_version(@bcf_hdr, version)
|
|
58
|
+
rc = LibHTS.bcf_hdr_set_version(@bcf_hdr, version)
|
|
59
|
+
raise "Failed to set VCF header version" if rc.negative?
|
|
60
|
+
|
|
61
|
+
mark_sync_needed!
|
|
62
|
+
sync_if_needed!
|
|
63
|
+
self
|
|
40
64
|
end
|
|
41
65
|
|
|
42
66
|
def nsamples
|
|
43
67
|
LibHTS.bcf_hdr_nsamples(@bcf_hdr)
|
|
44
68
|
end
|
|
45
69
|
|
|
70
|
+
def target_count
|
|
71
|
+
target_names.size
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def get_tid(name)
|
|
75
|
+
name2id(name)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def target_name(rid)
|
|
79
|
+
id2name(rid)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def target_names
|
|
83
|
+
seqnames
|
|
84
|
+
end
|
|
85
|
+
|
|
46
86
|
def samples
|
|
47
87
|
# bcf_hdr_id2name is macro function
|
|
48
88
|
@bcf_hdr[:samples]
|
|
@@ -50,17 +90,65 @@ module HTS
|
|
|
50
90
|
.map(&:read_string)
|
|
51
91
|
end
|
|
52
92
|
|
|
93
|
+
attr_reader :subset_samples, :subset_imap_pointer
|
|
94
|
+
|
|
95
|
+
def subset?
|
|
96
|
+
!@subset_imap.nil?
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def subset_sample_count
|
|
100
|
+
subset? ? @subset_samples.length : 0
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def subset(samples)
|
|
104
|
+
subset_samples = normalize_subset_samples(samples)
|
|
105
|
+
validate_subset_samples!(subset_samples)
|
|
106
|
+
|
|
107
|
+
nil
|
|
108
|
+
imap_pointer = nil
|
|
109
|
+
if subset_samples.empty?
|
|
110
|
+
subset_hdr = LibHTS.bcf_hdr_subset(@bcf_hdr, 0, ::FFI::Pointer::NULL, ::FFI::Pointer::NULL)
|
|
111
|
+
else
|
|
112
|
+
encoded_samples = subset_samples.map { |name| FFI::MemoryPointer.from_string(name) }
|
|
113
|
+
sample_pointers = FFI::MemoryPointer.new(:pointer, subset_samples.length)
|
|
114
|
+
sample_pointers.write_array_of_pointer(encoded_samples)
|
|
115
|
+
imap_pointer = FFI::MemoryPointer.new(:int, subset_samples.length)
|
|
116
|
+
subset_hdr = LibHTS.bcf_hdr_subset(@bcf_hdr, subset_samples.length, sample_pointers, imap_pointer)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
raise SubsetError, "Failed to subset BCF header samples #{subset_samples.inspect}" if subset_hdr.to_ptr.null?
|
|
120
|
+
|
|
121
|
+
composed_imap = compose_subset_imap(read_subset_imap(imap_pointer, subset_samples.length))
|
|
122
|
+
self.class.new(subset_hdr).tap do |header|
|
|
123
|
+
header.send(:set_subset_state, subset_samples, composed_imap)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
53
127
|
def add_sample(sample, sync: true)
|
|
54
|
-
LibHTS.bcf_hdr_add_sample(@bcf_hdr, sample)
|
|
55
|
-
|
|
128
|
+
rc = LibHTS.bcf_hdr_add_sample(@bcf_hdr, sample)
|
|
129
|
+
raise "Failed to add sample #{sample}" if rc.negative?
|
|
130
|
+
|
|
131
|
+
mark_sync_needed!
|
|
132
|
+
sync_if_needed! if sync
|
|
133
|
+
self
|
|
56
134
|
end
|
|
57
135
|
|
|
58
136
|
def merge(hdr)
|
|
59
|
-
LibHTS.bcf_hdr_merge(@bcf_hdr, hdr.struct)
|
|
137
|
+
merged = LibHTS.bcf_hdr_merge(@bcf_hdr, hdr.struct)
|
|
138
|
+
raise "Failed to merge BCF headers" if merged.to_ptr.null?
|
|
139
|
+
|
|
140
|
+
@bcf_hdr = merged
|
|
141
|
+
mark_sync_needed!
|
|
142
|
+
sync_if_needed!
|
|
143
|
+
self
|
|
60
144
|
end
|
|
61
145
|
|
|
62
146
|
def sync
|
|
63
|
-
LibHTS.bcf_hdr_sync(@bcf_hdr)
|
|
147
|
+
rc = LibHTS.bcf_hdr_sync(@bcf_hdr)
|
|
148
|
+
raise "Failed to sync BCF header" if rc.negative?
|
|
149
|
+
|
|
150
|
+
@sync_needed = false
|
|
151
|
+
self
|
|
64
152
|
end
|
|
65
153
|
|
|
66
154
|
def read_bcf(fname)
|
|
@@ -68,32 +156,121 @@ module HTS
|
|
|
68
156
|
end
|
|
69
157
|
|
|
70
158
|
def append(line)
|
|
71
|
-
LibHTS.bcf_hdr_append(@bcf_hdr, line)
|
|
159
|
+
rc = LibHTS.bcf_hdr_append(@bcf_hdr, line)
|
|
160
|
+
raise "Failed to append VCF header line" if rc.negative?
|
|
161
|
+
|
|
162
|
+
mark_sync_needed!
|
|
163
|
+
self
|
|
72
164
|
end
|
|
73
165
|
|
|
74
|
-
def delete(bcf_hl_type, key) # FIXME
|
|
166
|
+
def delete(bcf_hl_type, key = nil) # FIXME
|
|
167
|
+
existed = hrec_exists?(bcf_hl_type, key)
|
|
75
168
|
type = bcf_hl_type_to_int(bcf_hl_type)
|
|
76
169
|
LibHTS.bcf_hdr_remove(@bcf_hdr, type, key)
|
|
170
|
+
mark_sync_needed! if existed
|
|
171
|
+
existed
|
|
77
172
|
end
|
|
78
173
|
|
|
79
174
|
def get_hrec(bcf_hl_type, key, value, str_class = nil)
|
|
80
175
|
type = bcf_hl_type_to_int(bcf_hl_type)
|
|
81
|
-
hrec =
|
|
82
|
-
|
|
176
|
+
hrec = borrowed_hrec(type, key, value, str_class)
|
|
177
|
+
return nil if hrec.to_ptr.null?
|
|
178
|
+
|
|
179
|
+
HeaderRecord.new(owned_hrec(hrec))
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def edit
|
|
183
|
+
@sync_depth += 1
|
|
184
|
+
yield self
|
|
185
|
+
self
|
|
186
|
+
ensure
|
|
187
|
+
@sync_depth -= 1
|
|
188
|
+
sync_if_needed!
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def add_contig(id, length: nil, **attributes)
|
|
192
|
+
fields = [["ID", id.to_s]]
|
|
193
|
+
fields << ["length", length.to_s] unless length.nil?
|
|
194
|
+
fields.concat normalize_meta_attributes(attributes)
|
|
195
|
+
append_structured_meta("contig", fields)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def remove_contig(id)
|
|
199
|
+
delete("CONTIG", id.to_s).tap { sync_if_needed! }
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def add_filter(id, description:, **attributes)
|
|
203
|
+
fields = [["ID", id.to_s], ["Description", description.to_s]]
|
|
204
|
+
fields.concat normalize_meta_attributes(attributes)
|
|
205
|
+
append_structured_meta("FILTER", fields)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def remove_filter(id)
|
|
209
|
+
delete("FILTER", id.to_s).tap { sync_if_needed! }
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def add_info(id, number:, type:, description:, **attributes)
|
|
213
|
+
fields = [["ID", id.to_s], ["Number", normalize_bcf_number(number)], ["Type", normalize_bcf_type(type)],
|
|
214
|
+
["Description", description.to_s]]
|
|
215
|
+
fields.concat normalize_meta_attributes(attributes)
|
|
216
|
+
append_structured_meta("INFO", fields)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def update_info(id, number:, type:, description:, **attributes)
|
|
220
|
+
delete("INFO", id.to_s)
|
|
221
|
+
add_info(id, number:, type:, description:, **attributes)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def remove_info(id)
|
|
225
|
+
delete("INFO", id.to_s).tap { sync_if_needed! }
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def add_format(id, number:, type:, description:, **attributes)
|
|
229
|
+
fields = [["ID", id.to_s], ["Number", normalize_bcf_number(number)], ["Type", normalize_bcf_type(type)],
|
|
230
|
+
["Description", description.to_s]]
|
|
231
|
+
fields.concat normalize_meta_attributes(attributes)
|
|
232
|
+
append_structured_meta("FORMAT", fields)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def update_format(id, number:, type:, description:, **attributes)
|
|
236
|
+
delete("FORMAT", id.to_s)
|
|
237
|
+
add_format(id, number:, type:, description:, **attributes)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def remove_format(id)
|
|
241
|
+
delete("FORMAT", id.to_s).tap { sync_if_needed! }
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def add_meta(key, value = nil, **attributes)
|
|
245
|
+
if attributes.empty?
|
|
246
|
+
append("###{key}=#{value}")
|
|
247
|
+
sync_if_needed!
|
|
248
|
+
self
|
|
249
|
+
else
|
|
250
|
+
append_structured_meta(key.to_s, normalize_meta_attributes(attributes))
|
|
251
|
+
end
|
|
83
252
|
end
|
|
84
253
|
|
|
85
254
|
def seqnames
|
|
86
255
|
n = FFI::MemoryPointer.new(:int)
|
|
87
256
|
names = LibHTS.bcf_hdr_seqnames(@bcf_hdr, n)
|
|
88
|
-
|
|
89
|
-
|
|
257
|
+
begin
|
|
258
|
+
names.read_array_of_pointer(n.read_int)
|
|
259
|
+
.map(&:read_string)
|
|
260
|
+
ensure
|
|
261
|
+
LibHTS.hts_free(names) unless names.null?
|
|
262
|
+
end
|
|
90
263
|
end
|
|
91
264
|
|
|
92
265
|
def to_s
|
|
93
266
|
kstr = LibHTS::KString.new
|
|
94
|
-
|
|
267
|
+
begin
|
|
268
|
+
raise "Failed to get header string" if LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr).negative?
|
|
95
269
|
|
|
96
|
-
|
|
270
|
+
kstr.read_string_copy
|
|
271
|
+
ensure
|
|
272
|
+
kstr.free_buffer
|
|
273
|
+
end
|
|
97
274
|
end
|
|
98
275
|
|
|
99
276
|
def name2id(name)
|
|
@@ -106,6 +283,82 @@ module HTS
|
|
|
106
283
|
|
|
107
284
|
private
|
|
108
285
|
|
|
286
|
+
def normalize_bcf_type(type)
|
|
287
|
+
BCF_TYPE_MAP.fetch(type.to_sym, type.to_s)
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def normalize_bcf_number(number)
|
|
291
|
+
case number
|
|
292
|
+
when :a, :A then "A"
|
|
293
|
+
when :r, :R then "R"
|
|
294
|
+
when :g, :G then "G"
|
|
295
|
+
when :variable, :var, :dot then "."
|
|
296
|
+
else number.to_s
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def normalize_meta_attributes(attributes)
|
|
301
|
+
attributes.map do |key, value|
|
|
302
|
+
meta_key = key.to_s.split("_").map.with_index { |part, index| index.zero? ? part : part.capitalize }.join
|
|
303
|
+
meta_value = value.is_a?(Array) ? value.join(",") : value.to_s
|
|
304
|
+
[meta_key, meta_value]
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def append_structured_meta(label, fields)
|
|
309
|
+
body = fields.map { |key, value| "#{key}=#{format_meta_value(key, value)}" }.join(",")
|
|
310
|
+
append("###{label}=<#{body}>")
|
|
311
|
+
sync_if_needed!
|
|
312
|
+
self
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def format_meta_value(key, value)
|
|
316
|
+
return quote_meta_value(value) if key == "Description"
|
|
317
|
+
return value if value.match?(/\A[[:alnum:]_.:+-]+\z/)
|
|
318
|
+
|
|
319
|
+
quote_meta_value(value)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def quote_meta_value(value)
|
|
323
|
+
%("#{value.gsub(/([\\"])/, '\\\\1')}")
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def mark_sync_needed!
|
|
327
|
+
@sync_needed = true
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def sync_if_needed!
|
|
331
|
+
sync if @sync_needed && @sync_depth.zero?
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def hrec_exists?(bcf_hl_type, key)
|
|
335
|
+
type = bcf_hl_type_to_int(bcf_hl_type)
|
|
336
|
+
lookup_key, lookup_value, str_class = hrec_lookup_args(type, key)
|
|
337
|
+
hrec = borrowed_hrec(type, lookup_key, lookup_value, str_class)
|
|
338
|
+
!hrec.to_ptr.null?
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def borrowed_hrec(type, key, value, str_class)
|
|
342
|
+
LibHTS.bcf_hdr_get_hrec(@bcf_hdr, type, key, value, str_class)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def owned_hrec(hrec)
|
|
346
|
+
LibHTS.bcf_hrec_dup(hrec).tap do |owned|
|
|
347
|
+
raise "Failed to duplicate BCF header record" if owned.to_ptr.null?
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def hrec_lookup_args(type, key)
|
|
352
|
+
case type
|
|
353
|
+
when LibHTS::BCF_HL_FLT, LibHTS::BCF_HL_INFO, LibHTS::BCF_HL_FMT, LibHTS::BCF_HL_CTG
|
|
354
|
+
["ID", key, nil]
|
|
355
|
+
when LibHTS::BCF_HL_GEN
|
|
356
|
+
[key, nil, nil]
|
|
357
|
+
else
|
|
358
|
+
["ID", key, nil]
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
|
|
109
362
|
def bcf_hl_type_to_int(bcf_hl_type)
|
|
110
363
|
return bcf_hl_type if bcf_hl_type.is_a?(Integer)
|
|
111
364
|
|
|
@@ -129,6 +382,60 @@ module HTS
|
|
|
129
382
|
|
|
130
383
|
def initialize_copy(orig)
|
|
131
384
|
@bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
|
|
385
|
+
@sync_depth = 0
|
|
386
|
+
@sync_needed = false
|
|
387
|
+
set_subset_state(orig.subset_samples, orig.send(:subset_imap))
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
protected
|
|
391
|
+
|
|
392
|
+
attr_reader :subset_imap
|
|
393
|
+
|
|
394
|
+
def set_subset_state(samples, imap)
|
|
395
|
+
@subset_samples = samples&.dup
|
|
396
|
+
@subset_imap = imap&.dup
|
|
397
|
+
@subset_imap_pointer = build_subset_imap_pointer(@subset_imap)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
private
|
|
401
|
+
|
|
402
|
+
def normalize_subset_samples(samples)
|
|
403
|
+
case samples
|
|
404
|
+
when String
|
|
405
|
+
[samples]
|
|
406
|
+
else
|
|
407
|
+
Array(samples).map(&:to_s)
|
|
408
|
+
end
|
|
409
|
+
rescue TypeError
|
|
410
|
+
raise SubsetError, "Sample subset must be a String or an Array of sample names"
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def validate_subset_samples!(subset_samples)
|
|
414
|
+
duplicates = subset_samples.group_by(&:itself).select { |_name, group| group.length > 1 }.keys
|
|
415
|
+
raise SubsetError, "Duplicate sample names in subset: #{duplicates.join(', ')}" unless duplicates.empty?
|
|
416
|
+
|
|
417
|
+
missing = subset_samples.reject { |name| samples.include?(name) }
|
|
418
|
+
raise UnknownSampleError, "Unknown sample names: #{missing.join(', ')}" unless missing.empty?
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def read_subset_imap(pointer, length)
|
|
422
|
+
return [] if length.zero?
|
|
423
|
+
|
|
424
|
+
pointer.read_array_of_int(length)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def compose_subset_imap(imap)
|
|
428
|
+
base_imap = @subset_imap || Array.new(samples.length, &:itself)
|
|
429
|
+
imap.map { |index| base_imap.fetch(index) }
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def build_subset_imap_pointer(imap)
|
|
433
|
+
return nil unless imap
|
|
434
|
+
return nil if imap.empty?
|
|
435
|
+
|
|
436
|
+
FFI::MemoryPointer.new(:int, imap.length).tap do |pointer|
|
|
437
|
+
pointer.write_array_of_int(imap)
|
|
438
|
+
end
|
|
132
439
|
end
|
|
133
440
|
end
|
|
134
441
|
end
|
data/lib/hts/bcf/info.rb
CHANGED
|
@@ -6,7 +6,6 @@ module HTS
|
|
|
6
6
|
class Info
|
|
7
7
|
def initialize(record)
|
|
8
8
|
@record = record
|
|
9
|
-
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
|
10
9
|
end
|
|
11
10
|
|
|
12
11
|
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
|
@@ -16,37 +15,55 @@ module HTS
|
|
|
16
15
|
# I think they are better than `fetch_int`` and `fetch_float`.
|
|
17
16
|
def get(key, type = nil)
|
|
18
17
|
n = FFI::MemoryPointer.new(:int)
|
|
19
|
-
p1 =
|
|
18
|
+
p1 = FFI::MemoryPointer.new(:pointer)
|
|
19
|
+
p1.write_pointer(FFI::Pointer::NULL)
|
|
20
20
|
h = @record.header.struct
|
|
21
21
|
r = @record.struct
|
|
22
22
|
|
|
23
|
-
info_values = proc do |typ|
|
|
23
|
+
info_values = proc do |typ, reader|
|
|
24
24
|
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
|
|
25
25
|
return nil if ret < 0 # return from method.
|
|
26
26
|
|
|
27
|
-
p1.read_pointer
|
|
27
|
+
dst = p1.read_pointer
|
|
28
|
+
begin
|
|
29
|
+
reader.call(dst, n.read_int)
|
|
30
|
+
ensure
|
|
31
|
+
LibHTS.hts_free(dst) unless dst.null?
|
|
32
|
+
p1.write_pointer(FFI::Pointer::NULL)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
actual_type = ht_type_to_sym(get_info_type(key))
|
|
37
|
+
if type && actual_type && !info_type_compatible?(actual_type, type.to_sym)
|
|
38
|
+
raise InfoTypeError, "Tag #{key} is not #{type_label(type)} INFO field"
|
|
28
39
|
end
|
|
29
40
|
|
|
30
|
-
type ||=
|
|
41
|
+
type ||= actual_type
|
|
42
|
+
return nil if actual_type && !key?(key)
|
|
31
43
|
|
|
32
44
|
case type&.to_sym
|
|
33
45
|
when :int, :int32
|
|
34
|
-
info_values.call(LibHTS::BCF_HT_INT)
|
|
35
|
-
|
|
46
|
+
info_values.call(LibHTS::BCF_HT_INT, ->(dst, len) { dst.read_array_of_int32(len) })
|
|
47
|
+
when :int64, :long
|
|
48
|
+
info_values.call(LibHTS::BCF_HT_LONG, ->(dst, len) { dst.read_array_of_int64(len) })
|
|
36
49
|
when :float, :real
|
|
37
|
-
info_values.call(LibHTS::BCF_HT_REAL)
|
|
38
|
-
.read_array_of_float(n.read_int)
|
|
50
|
+
info_values.call(LibHTS::BCF_HT_REAL, ->(dst, len) { dst.read_array_of_float(len) })
|
|
39
51
|
when :flag, :bool
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
begin
|
|
53
|
+
case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
|
|
54
|
+
when 1 then true
|
|
55
|
+
when 0 then false
|
|
56
|
+
when -1 then nil
|
|
57
|
+
else
|
|
58
|
+
raise InfoReadError, "Unknown return value from bcf_get_info_flag: #{ret}"
|
|
59
|
+
end
|
|
60
|
+
ensure
|
|
61
|
+
dst = p1.read_pointer
|
|
62
|
+
LibHTS.hts_free(dst) unless dst.null?
|
|
63
|
+
p1.write_pointer(FFI::Pointer::NULL)
|
|
46
64
|
end
|
|
47
65
|
when :string, :str
|
|
48
|
-
info_values.call(LibHTS::BCF_HT_STR)
|
|
49
|
-
.read_string
|
|
66
|
+
info_values.call(LibHTS::BCF_HT_STR, ->(dst, _len) { dst.read_string })
|
|
50
67
|
end
|
|
51
68
|
end
|
|
52
69
|
|
|
@@ -60,6 +77,11 @@ module HTS
|
|
|
60
77
|
get(key, :float)
|
|
61
78
|
end
|
|
62
79
|
|
|
80
|
+
# For compatibility with HTS.cr.
|
|
81
|
+
def get_int64(key)
|
|
82
|
+
get(key, :int64)
|
|
83
|
+
end
|
|
84
|
+
|
|
63
85
|
# For compatibility with HTS.cr.
|
|
64
86
|
def get_string(key)
|
|
65
87
|
get(key, :string)
|
|
@@ -89,6 +111,11 @@ module HTS
|
|
|
89
111
|
when true, false
|
|
90
112
|
update_flag(key, value)
|
|
91
113
|
when Integer
|
|
114
|
+
unless int32_range?(value)
|
|
115
|
+
raise RangeError,
|
|
116
|
+
"Integer out of int32 range for []=. Current htslib backend does not support int64 INFO update."
|
|
117
|
+
end
|
|
118
|
+
|
|
92
119
|
update_int(key, [value])
|
|
93
120
|
when Float
|
|
94
121
|
update_float(key, [value])
|
|
@@ -98,6 +125,11 @@ module HTS
|
|
|
98
125
|
if value.empty?
|
|
99
126
|
raise ArgumentError, "Cannot set INFO field to empty array. Use nil to delete."
|
|
100
127
|
elsif value.all? { |v| v.is_a?(Integer) }
|
|
128
|
+
unless value.all? { |v| int32_range?(v) }
|
|
129
|
+
raise RangeError,
|
|
130
|
+
"Integer array contains out-of-int32 values for []=. Current htslib backend does not support int64 INFO update."
|
|
131
|
+
end
|
|
132
|
+
|
|
101
133
|
update_int(key, value)
|
|
102
134
|
elsif value.all? { |v| v.is_a?(Numeric) }
|
|
103
135
|
update_float(key, value)
|
|
@@ -125,11 +157,19 @@ module HTS
|
|
|
125
157
|
values.size,
|
|
126
158
|
LibHTS::BCF_HT_INT
|
|
127
159
|
)
|
|
128
|
-
raise "Failed to update INFO int field '#{key}': #{ret}" if ret < 0
|
|
160
|
+
raise InfoUpdateError, "Failed to update INFO int field '#{key}': #{ret}" if ret < 0
|
|
129
161
|
|
|
130
162
|
ret
|
|
131
163
|
end
|
|
132
164
|
|
|
165
|
+
# Update INFO field with int64 value(s).
|
|
166
|
+
# @note int64 INFO values are primarily relevant for VCF output.
|
|
167
|
+
# @param key [String] INFO tag name
|
|
168
|
+
# @param values [Array<Integer>] integer values (use single-element array for scalar)
|
|
169
|
+
def update_int64(_key, _values)
|
|
170
|
+
raise UnsupportedInfoOperationError, "htslib backend does not implement int64 INFO update (BCF_HT_LONG)"
|
|
171
|
+
end
|
|
172
|
+
|
|
133
173
|
# Update INFO field with float value(s).
|
|
134
174
|
# For compatibility with HTS.cr.
|
|
135
175
|
# @param key [String] INFO tag name
|
|
@@ -146,7 +186,7 @@ module HTS
|
|
|
146
186
|
values.size,
|
|
147
187
|
LibHTS::BCF_HT_REAL
|
|
148
188
|
)
|
|
149
|
-
raise "Failed to update INFO float field '#{key}': #{ret}" if ret < 0
|
|
189
|
+
raise InfoUpdateError, "Failed to update INFO float field '#{key}': #{ret}" if ret < 0
|
|
150
190
|
|
|
151
191
|
ret
|
|
152
192
|
end
|
|
@@ -164,7 +204,7 @@ module HTS
|
|
|
164
204
|
1,
|
|
165
205
|
LibHTS::BCF_HT_STR
|
|
166
206
|
)
|
|
167
|
-
raise "Failed to update INFO string field '#{key}': #{ret}" if ret < 0
|
|
207
|
+
raise InfoUpdateError, "Failed to update INFO string field '#{key}': #{ret}" if ret < 0
|
|
168
208
|
|
|
169
209
|
ret
|
|
170
210
|
end
|
|
@@ -194,7 +234,7 @@ module HTS
|
|
|
194
234
|
LibHTS::BCF_HT_FLAG
|
|
195
235
|
)
|
|
196
236
|
end
|
|
197
|
-
raise "Failed to update INFO flag field '#{key}': #{ret}" if ret < 0
|
|
237
|
+
raise InfoUpdateError, "Failed to update INFO flag field '#{key}': #{ret}" if ret < 0
|
|
198
238
|
|
|
199
239
|
ret
|
|
200
240
|
end
|
|
@@ -203,9 +243,9 @@ module HTS
|
|
|
203
243
|
# @param key [String] INFO tag name
|
|
204
244
|
# @return [Boolean] true if field was deleted, false if it didn't exist
|
|
205
245
|
def delete(key)
|
|
206
|
-
# Try to get current type to check existence
|
|
207
246
|
type = get_info_type(key)
|
|
208
247
|
return false if type.nil?
|
|
248
|
+
return false unless key?(key)
|
|
209
249
|
|
|
210
250
|
# Delete by setting n=0
|
|
211
251
|
ret = LibHTS.bcf_update_info(
|
|
@@ -225,9 +265,21 @@ module HTS
|
|
|
225
265
|
# @param key [String] INFO tag name
|
|
226
266
|
# @return [Boolean] true if the field exists
|
|
227
267
|
def key?(key)
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
268
|
+
type = header_info_type(key)
|
|
269
|
+
return false if type.nil?
|
|
270
|
+
|
|
271
|
+
ndst = FFI::MemoryPointer.new(:int)
|
|
272
|
+
ndst.write_int(0)
|
|
273
|
+
dst_ptr = FFI::MemoryPointer.new(:pointer)
|
|
274
|
+
dst_ptr.write_pointer(FFI::Pointer::NULL)
|
|
275
|
+
|
|
276
|
+
ret = LibHTS.bcf_get_info_values(@record.header.struct, @record.struct, key, dst_ptr, ndst, type)
|
|
277
|
+
type == LibHTS::BCF_HT_FLAG ? ret == 1 : ret >= 0
|
|
278
|
+
ensure
|
|
279
|
+
if dst_ptr
|
|
280
|
+
dst = dst_ptr.read_pointer
|
|
281
|
+
LibHTS.hts_free(dst) unless dst.null?
|
|
282
|
+
end
|
|
231
283
|
end
|
|
232
284
|
|
|
233
285
|
alias include? key?
|
|
@@ -277,16 +329,15 @@ module HTS
|
|
|
277
329
|
end
|
|
278
330
|
|
|
279
331
|
def get_info_type(key)
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
nil
|
|
332
|
+
header_info_type(key)
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def header_info_type(key)
|
|
336
|
+
id = LibHTS.bcf_hdr_id2int(@record.header.struct, LibHTS::BCF_DT_ID, key)
|
|
337
|
+
return nil if id.negative?
|
|
338
|
+
return nil unless LibHTS.bcf_hdr_idinfo_exists(@record.header.struct, LibHTS::BCF_HL_INFO, id)
|
|
339
|
+
|
|
340
|
+
LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, id)
|
|
290
341
|
end
|
|
291
342
|
|
|
292
343
|
def ht_type_to_sym(t)
|
|
@@ -295,7 +346,39 @@ module HTS
|
|
|
295
346
|
when LibHTS::BCF_HT_INT then :int
|
|
296
347
|
when LibHTS::BCF_HT_REAL then :float
|
|
297
348
|
when LibHTS::BCF_HT_STR then :string
|
|
298
|
-
when LibHTS::BCF_HT_LONG then :
|
|
349
|
+
when LibHTS::BCF_HT_LONG then :int64
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def int32_range?(value)
|
|
354
|
+
value >= -2_147_483_648 && value <= 2_147_483_647
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def info_type_compatible?(actual_type, requested_type)
|
|
358
|
+
case requested_type
|
|
359
|
+
when :int, :int32
|
|
360
|
+
actual_type == :int
|
|
361
|
+
when :int64, :long
|
|
362
|
+
%i[int int64].include?(actual_type)
|
|
363
|
+
when :float, :real
|
|
364
|
+
actual_type == :float
|
|
365
|
+
when :flag, :bool
|
|
366
|
+
actual_type == :flag
|
|
367
|
+
when :string, :str
|
|
368
|
+
actual_type == :string
|
|
369
|
+
else
|
|
370
|
+
actual_type == requested_type
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def type_label(type)
|
|
375
|
+
case type.to_sym
|
|
376
|
+
when :int, :int32 then "integer"
|
|
377
|
+
when :int64, :long then "integer"
|
|
378
|
+
when :float, :real then "float"
|
|
379
|
+
when :flag, :bool then "flag"
|
|
380
|
+
when :string, :str then "string"
|
|
381
|
+
else type.to_s
|
|
299
382
|
end
|
|
300
383
|
end
|
|
301
384
|
end
|